[PATCH] c++: Fix call to push_tinst_level during satisfaction [PR99214]

2021-02-24 Thread Patrick Palka via Gcc-patches
In the three-parameter version of satisfy_declaration_constraints, when
't' isn't the most general template, then 't' doesn't correspond with
the augmented template arguments 'args', and so the instantiation
context that we push via push_tinst_level isn't quite correct.  This
manifests as misleading diagnostic context lines during satisfaction
failure as in the testcase below for which without this patch we emit
  In substitution of '... static void A::f() [with U = int]'
and with this patch we emit
  In substitution of '... static void A::f() [with U = char; T = int]'.

This patch fixes this by passing the most general template to
push_tinst_level.

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for
trunk?

gcc/cp/ChangeLog:

PR c++/99214
* constraint.cc (get_normalized_constraints_from_decl): Fix
formatting.
(satisfy_declaration_constraints): Be lazy about augmenting
'args'.  Pass the most general template to push_tinst_level.

gcc/testsuite/ChangeLog:

PR c++/99214
* g++.dg/concepts/diagnostic16.C: New test.
---
 gcc/cp/constraint.cc | 16 
 gcc/testsuite/g++.dg/concepts/diagnostic16.C | 15 +++
 2 files changed, 23 insertions(+), 8 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/concepts/diagnostic16.C

diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc
index 31e0fb5079a..88963e687a7 100644
--- a/gcc/cp/constraint.cc
+++ b/gcc/cp/constraint.cc
@@ -910,11 +910,9 @@ get_normalized_constraints_from_decl (tree d, bool diag = 
false)
  accepting the latter causes the template parameter level of U
  to be reduced in a way that makes it overly difficult substitute
  concrete arguments (i.e., eventually {int, int} during satisfaction.  */
-  if (tmpl)
-  {
-if (DECL_LANG_SPECIFIC(tmpl) && !DECL_TEMPLATE_SPECIALIZATION (tmpl))
-  tmpl = most_general_template (tmpl);
-  }
+  if (tmpl && DECL_LANG_SPECIFIC (tmpl)
+  && !DECL_TEMPLATE_SPECIALIZATION (tmpl))
+tmpl = most_general_template (tmpl);
 
   /* If we're not diagnosing errors, use cached constraints, if any.  */
   if (!diag)
@@ -3157,12 +3155,14 @@ satisfy_declaration_constraints (tree t, tree args, 
sat_info info)
 
   gcc_assert (TREE_CODE (t) == TEMPLATE_DECL);
 
-  args = add_outermost_template_args (t, args);
-
   tree result = boolean_true_node;
   if (tree norm = normalize_template_requirements (t, info.noisy ()))
 {
-  if (!push_tinst_level (t, args))
+  args = add_outermost_template_args (t, args);
+  tree gen_tmpl = t;
+  if (DECL_LANG_SPECIFIC (t) && !DECL_TEMPLATE_SPECIALIZATION (t))
+   gen_tmpl = most_general_template (t);
+  if (!push_tinst_level (gen_tmpl, args))
return result;
   tree pattern = DECL_TEMPLATE_RESULT (t);
   push_access_scope (pattern);
diff --git a/gcc/testsuite/g++.dg/concepts/diagnostic16.C 
b/gcc/testsuite/g++.dg/concepts/diagnostic16.C
new file mode 100644
index 000..b8d586e9a21
--- /dev/null
+++ b/gcc/testsuite/g++.dg/concepts/diagnostic16.C
@@ -0,0 +1,15 @@
+// PR c++/99214
+// { dg-do compile { target c++20 } }
+
+template 
+struct A {
+  template  static void f() requires ([] { return U::fail; }()); // { 
dg-error "fail" }
+  template  static void f();
+};
+
+int main() {
+  A::f();
+}
+
+// This matches the context line "In substitution of '... [with U = char; T = 
int]'"
+// { dg-message "U = char; T = int" "" { target *-*-* } 0 }
-- 
2.30.1.602.g966e671106



[PATCH] c++: Fix pretty printing of context of local class [PR99213]

2021-02-24 Thread Patrick Palka via Gcc-patches
My r10-7705 patch for PR94521 made us set TFF_NO_FUNCTION_ARGUMENTS when
pretty printing the function scope of a local class type in order to
eliminate infinite recursion with a function signature that contains
decltype([]{}).  But due to the way dump_function_decl works, this
change regressed our pretty printing of local class types whose context
contains a class template specialization, as in the testcase below, in
which we wrongly pretty print the two local types as 'A::f::S1'
and 'B::f::S2'.

This patch makes dump_scope pass TFF_NO_TEMPLATE_BINDINGS instead of
TFF_NO_FUNCTION_ARGUMENTS when pretty printing a function scope.  It
appears this is the strictly better flag to use: it avoids the infinite
recursion issue, it restores pretty printing of the function parameter
list, and it stops dump_function_decl from trying to print a function
template specialization in its own weird way.

Summary of pretty printing differences for the below testcase:

  r10-7704:   A::f() [with U = char; T = int]::S1
  B::f() [with T = int]::S2

  r10-7705:   A::f::S1
  B::f::S2

  this patch: A::f()::S1
  B::f()::S2

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for
trunk?

gcc/cp/ChangeLog:

PR c++/99213
PR c++/94521
* error.c (dump_scope): Pass TFF_NO_TEMPLATE_BINDINGS instead of
TFF_NO_FUNCTION_ARGUMENTS when dumping a function scope.

gcc/testsuite/ChangeLog:

PR c++/99213
PR c++/94521
* g++.dg/diagnostic/local1.C: New test.
---
 gcc/cp/error.c   |  4 +---
 gcc/testsuite/g++.dg/diagnostic/local1.C | 25 
 2 files changed, 26 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/diagnostic/local1.C

diff --git a/gcc/cp/error.c b/gcc/cp/error.c
index 5213a8030ca..ff4ae6f4b23 100644
--- a/gcc/cp/error.c
+++ b/gcc/cp/error.c
@@ -243,9 +243,7 @@ dump_scope (cxx_pretty_printer *pp, tree scope, int flags)
 }
   else if ((flags & TFF_SCOPE) && TREE_CODE (scope) == FUNCTION_DECL)
 {
-  if (DECL_USE_TEMPLATE (scope))
-   f |= TFF_NO_FUNCTION_ARGUMENTS;
-  dump_function_decl (pp, scope, f);
+  dump_function_decl (pp, scope, f | TFF_NO_TEMPLATE_BINDINGS);
   pp_cxx_colon_colon (pp);
 }
 }
diff --git a/gcc/testsuite/g++.dg/diagnostic/local1.C 
b/gcc/testsuite/g++.dg/diagnostic/local1.C
new file mode 100644
index 000..5905b571a44
--- /dev/null
+++ b/gcc/testsuite/g++.dg/diagnostic/local1.C
@@ -0,0 +1,25 @@
+// PR c++/99213
+// { dg-do compile { target c++14 } }
+
+template 
+struct A {
+  template 
+  static auto f() {
+struct S1{};
+return S1{};
+  }
+};
+
+using type = void;
+using type = decltype(A::f()); // { dg-error 
"A::f\\(\\)::S1"  }
+
+template 
+struct B {
+  static auto f() {
+struct S2{};
+return S2{};
+  }
+};
+
+using type = void;
+using type = decltype(B::f()); // { dg-error "B::f\\(\\)::S2"  }
-- 
2.30.1.602.g966e671106



Re: [PATCH v2] rs6000: Convert the vector element register to SImode [PR98914]

2021-02-24 Thread Xionghu Luo via Gcc-patches



On 2021/2/25 00:57, Segher Boessenkool wrote:
> Hi!
> 
> On Wed, Feb 24, 2021 at 09:06:24AM +0800, Xionghu Luo wrote:
>> vec_insert defines the element argument type to be signed int by ELFv2
>> ABI, When expanding a vector with a variable rtx, convert the rtx type
>> SImode.
> 
> But that is true for the intrinsics, not for all other callers of
> rs6000_expand_vector_init.  See
>  as well?
> 
> So I don't think you do this in the right place.  You can convince me
> with good arguments of course :-)

Thanks for pointing out, it seems we should convert the type to DImode in 
rs6000_expand_vector_set_var_p9 and rs6000_expand_vector_set_var_p8
to support both usage?

 
PS: for "vec_insert (i, u, n)" usage when n is long, what should the front-end
do in altivec_resolve_overloaded_builtin to follow the ELFv2 rule?  Currently,
no warning/error message or conversion there, INTEGRAL_TYPE_P range is much 
larger
than signed int.


gcc/config/rs6000/rs6000-c.c
altivec_resolve_overloaded_builtin
{
...
  if (!INTEGRAL_TYPE_P (TREE_TYPE (arg2)))
goto bad;
...
}



Updated the back-end patch as below.


0001-rs6000-Convert-the-vector-set-variable-idx-to-DImode.patch


vec_insert defines the element argument type to be signed int by ELFv2
ABI.  When expanding a vector with a variable rtx, convert the rtx type
to DImode to support both intrinsic usage and other callers from
rs6000_expand_vector_init produced by v[k] = val when k is long type.

gcc/ChangeLog:

2021-02-25  Xionghu Luo  

PR target/98914
* config/rs6000/rs6000.c (rs6000_expand_vector_set_var_p9):
Convert idx to DImode.
(rs6000_expand_vector_set_var_p8): Likewise.

gcc/testsuite/ChangeLog:

2021-02-25  Xionghu Luo  

PR target/98914
* gcc.target/powerpc/pr98914.c: New test.
---
 gcc/config/rs6000/rs6000.c | 33 +-
 gcc/testsuite/gcc.target/powerpc/pr98914.c | 11 
 2 files changed, 30 insertions(+), 14 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr98914.c

diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index ec068c58aa5..48eb91132a9 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -7000,11 +7000,15 @@ rs6000_expand_vector_set_var_p9 (rtx target, rtx val, 
rtx idx)
 
   gcc_assert (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (idx));
 
-  gcc_assert (GET_MODE (idx) == E_SImode);
-
   machine_mode inner_mode = GET_MODE (val);
 
-  rtx tmp = gen_reg_rtx (GET_MODE (idx));
+  machine_mode idx_mode = GET_MODE (idx);
+  rtx tmp = gen_reg_rtx (DImode);
+  if (idx_mode != DImode)
+tmp = convert_modes (DImode, idx_mode, idx, 0);
+  else
+tmp = idx;
+
   int width = GET_MODE_SIZE (inner_mode);
 
   gcc_assert (width >= 1 && width <= 8);
@@ -7012,9 +7016,7 @@ rs6000_expand_vector_set_var_p9 (rtx target, rtx val, rtx 
idx)
   int shift = exact_log2 (width);
   /* Generate the IDX for permute shift, width is the vector element size.
  idx = idx * width.  */
-  emit_insn (gen_ashlsi3 (tmp, idx, GEN_INT (shift)));
-
-  tmp = convert_modes (DImode, SImode, tmp, 1);
+  emit_insn (gen_ashldi3 (tmp, tmp, GEN_INT (shift)));
 
   /*  lvsrv1,0,idx.  */
   rtx pcvr = gen_reg_rtx (V16QImode);
@@ -7047,27 +7049,31 @@ rs6000_expand_vector_set_var_p8 (rtx target, rtx val, 
rtx idx)
 
   gcc_assert (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (idx));
 
-  gcc_assert (GET_MODE (idx) == E_SImode);
-
   machine_mode inner_mode = GET_MODE (val);
   HOST_WIDE_INT mode_mask = GET_MODE_MASK (inner_mode);
 
-  rtx tmp = gen_reg_rtx (GET_MODE (idx));
   int width = GET_MODE_SIZE (inner_mode);
 
+  machine_mode idx_mode = GET_MODE (idx);
+  rtx tmp = gen_reg_rtx (DImode);
+  if (idx_mode != DImode)
+tmp = convert_modes (DImode, idx_mode, idx, 0);
+  else
+tmp = idx;
+
   gcc_assert (width >= 1 && width <= 4);
 
   if (!BYTES_BIG_ENDIAN)
 {
   /*  idx = idx * width.  */
-  emit_insn (gen_mulsi3 (tmp, idx, GEN_INT (width)));
+  emit_insn (gen_muldi3 (tmp, tmp, GEN_INT (width)));
   /*  idx = idx + 8.  */
-  emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (8)));
+  emit_insn (gen_adddi3 (tmp, tmp, GEN_INT (8)));
 }
   else
 {
-  emit_insn (gen_mulsi3 (tmp, idx, GEN_INT (width)));
-  emit_insn (gen_subsi3 (tmp, GEN_INT (24 - width), tmp));
+  emit_insn (gen_muldi3 (tmp, idx, GEN_INT (width)));
+  emit_insn (gen_subdi3 (tmp, GEN_INT (24 - width), tmp));
 }
 
   /*  lxv vs33, mask.
@@ -7118,7 +7124,6 @@ rs6000_expand_vector_set_var_p8 (rtx target, rtx val, rtx 
idx)
   emit_insn (gen_rtx_SET (val_v16qi, sub_val));
 
   /*  lvsl13,0,idx.  */
-  tmp = convert_modes (DImode, SImode, tmp, 1);
   rtx pcv = gen_reg_rtx (V16QImode);
   emit_insn (gen_altivec_lvsl_reg (pcv, tmp));
 
diff --git a/gcc/testsuite/gcc.target/powerpc/pr98914.c 
b/gcc/testsuite/gcc.target/powerpc/pr98914.c
new file mode 100644
index 

[PATCH] gcc.misc-tests/outputs.exp: assert unique test-names

2021-02-24 Thread Hans-Peter Nilsson via Gcc-patches
The gcc.misc-tests/outputs.exp tests can take some effort to
digest.

Navigating and debugging causes for failing tests here isn't
helped by the existence of tests with duplicate names.
Let's stop that from happening.  This requires that test-run
output is actually reviewed, as Tcl errors don't stop the
test-run, but then again there's no such dejagnu construct
that I know of.

Tested x86_64-pc-linux-gnu.

Ok to commit?  Or is a renaming patch appending a
number-suffix, like:

--- outputs.exp.orig3   2021-02-25 06:13:28.304243791 +0100
+++ outputs.exp 2021-02-25 06:13:51.575457825 +0100
@@ -280,8 +280,8 @@ if { "$aout" != "" } then {
 }
 
 # Driver-chosen outputs.
-outest "$b asm default 1" $sing "-S" {} {{-0.s}}
-outest "$b asm default 2" $mult "-S" {} {{-1.s -2.s}}
+outest "$b-1 asm default 1" $sing "-S" {} {{-0.s}}
+outest "$b-2 asm default 2" $mult "-S" {} {{-1.s -2.s}}
...

...better and ok to commit?  (IMHO: yes, much easier to follow)

gcc/testsuite:
* gcc.misc-tests/outputs.exp: Append discriminating
suffixes to tests with duplicate names.
(outest): Assert that each running test has a unique
name.
---
 gcc/testsuite/gcc.misc-tests/outputs.exp | 36 +++-
 1 file changed, 22 insertions(+), 14 deletions(-)

diff --git a/gcc/testsuite/gcc.misc-tests/outputs.exp 
b/gcc/testsuite/gcc.misc-tests/outputs.exp
index ebd61448bfdd..0e5c1a55ce87 100644
--- a/gcc/testsuite/gcc.misc-tests/outputs.exp
+++ b/gcc/testsuite/gcc.misc-tests/outputs.exp
@@ -116,8 +116,16 @@ if [info exists env(MAKEFLAGS)] {
 # it weren't for
 # https://core.tcl-lang.org/tcl/tktview?name=5bbd044812), but .{i,s,o}
 # and .[iso] will pass even if only the .o is present.
+array unset outests *
 proc outest { test sources opts dirs outputs } {
 global b srcdir subdir
+global outests
+
+if { [info exists outests($test)] } {
+   error "multiple outputs.exp tests are named \"$test\", but for sanity, 
test-names must be unique"
+}
+set outests($test) 1
+
 set src {}
 foreach s $sources {
lappend src $srcdir/$subdir/$b$s
@@ -307,10 +315,10 @@ outest "$b exe savetmp named2" $mult "-o $b.exe 
-save-temps" {} {{--1.i --1.s --
 
 # Additional files are created when an @file is used
 if !$skip_atsave {
-outest "$b exe savetmp namedb" $sing "@/dev/null -o $b.exe -save-temps" {} 
{{--0.i --0.s --0.o .args.0 !!$gld .ld1_args !0 .exe}}
-outest "$b exe savetmp named2" $mult "@/dev/null -o $b.exe -save-temps" {} 
{{--1.i --1.s --1.o --2.i --2.s --2.o .args.0 !!$gld .ld1_args !0 .exe}}
-outest "$b exe savetmp named2" $mult "@/dev/null -I dummy -o $b.exe 
-save-temps" {} {{--1.i --1.s --1.o --2.i --2.s --2.o -args.0 -args.1 .args.2 
!!$gld .ld1_args !0 .exe}}
-outest "$b exe savetmp named2" $mult "@/dev/null -I dummy -L dummy -o $b.exe 
-save-temps" {} {{--1.i --1.s --1.o --2.i --2.s --2.o -args.0 -args.1 .args.2 
.args.3 !!$gld .ld1_args !0 .exe}}
+outest "$b exe savetmp namedb-2" $sing "@/dev/null -o $b.exe -save-temps" {} 
{{--0.i --0.s --0.o .args.0 !!$gld .ld1_args !0 .exe}}
+outest "$b exe savetmp named2-2" $mult "@/dev/null -o $b.exe -save-temps" {} 
{{--1.i --1.s --1.o --2.i --2.s --2.o .args.0 !!$gld .ld1_args !0 .exe}}
+outest "$b exe savetmp named2-3" $mult "@/dev/null -I dummy -o $b.exe 
-save-temps" {} {{--1.i --1.s --1.o --2.i --2.s --2.o -args.0 -args.1 .args.2 
!!$gld .ld1_args !0 .exe}}
+outest "$b exe savetmp named2-4" $mult "@/dev/null -I dummy -L dummy -o $b.exe 
-save-temps" {} {{--1.i --1.s --1.o --2.i --2.s --2.o -args.0 -args.1 .args.2 
.args.3 !!$gld .ld1_args !0 .exe}}
 }
 
 # Setting the main output to a dir selects it as the default aux
@@ -400,9 +408,9 @@ outest "$b exe soddovr namedir2" $mult "-o o/$b.exe 
-save-temps=obj -dumpdir ./"
 outest "$b exe scddovr namedir0" $sing "-o o/$b-0.exe -save-temps=cwd -dumpdir 
o/" {o/} {{-0.i -0.s -0.o -0.exe} {}}
 outest "$b exe scddovr namedirb" $sing "-o o/$b.exe -save-temps=cwd -dumpdir 
o/" {o/} {{-0.i -0.s -0.o .exe} {}}
 outest "$b exe scddovr namedir2" $mult "-o o/$b.exe -save-temps=cwd -dumpdir 
o/" {o/} {{-1.i -1.s -1.o -2.i -2.s -2.o .exe} {}}
-outest "$b exe ddstovr namedir0" $sing "-o $b-0.exe -save-temps -dumpdir o/" 
{o/} {{-0.i -0.s -0.o} {-0.exe}}
-outest "$b exe ddstovr namedirb" $sing "-o $b.exe -save-temps -dumpdir o/" 
{o/} {{-0.i -0.s -0.o} {.exe}}
-outest "$b exe ddstovr namedir2" $mult "-o $b.exe -save-temps -dumpdir o/" 
{o/} {{-1.i -1.s -1.o -2.i -2.s -2.o} {.exe}}
+outest "$b exe ddstovr namedir0-2" $sing "-o $b-0.exe -save-temps -dumpdir o/" 
{o/} {{-0.i -0.s -0.o} {-0.exe}}
+outest "$b exe ddstovr namedirb-2" $sing "-o $b.exe -save-temps -dumpdir o/" 
{o/} {{-0.i -0.s -0.o} {.exe}}
+outest "$b exe ddstovr namedir2-2" $mult "-o $b.exe -save-temps -dumpdir o/" 
{o/} {{-1.i -1.s -1.o -2.i -2.s -2.o} {.exe}}
 
 
 # Compiler- and driver-generated aux and dump outputs.
@@ -661,14 +669,14 @@ outest "$b lto mult nameddir" $mult "-o dir/$b.exe -O2 
-flto -flto-partition=one
 

[PATCH] outputs.exp: skip @file -save-temps if target has -L or -I

2021-02-24 Thread Hans-Peter Nilsson via Gcc-patches
The outputs.exp tests check what temporary files are created
and left behind with e.g. -save-temps.

Additional files are created in presence of @file option.
Adding an -I or -L option causes *another* temporary file to
appear.  I take it that's deliberate, as there are tests for
that behavior.

For native testing, the default test-framework baseboard
file unix.exp doesn't add any -I or -L options and all tests
pass.  For a newlib target however, you'll have a couple of
-L options (see the nopts handling in outputs.exp), leading
to (cris-elf with --target_board=cris-sim):

Running /x/gcc/gcc/testsuite/gcc.misc-tests/outputs.exp ...
FAIL: outputs exe savetmp namedb: extra
outputs.args.1
FAIL: outputs exe savetmp named2: extra
outputs.args.1
FAIL: outputs exe savetmp named2: extra
outputs.args.3
FAIL: outputs lto sing unnamed: extra
a.args.1

The failing tests are among the actual tests that check the
behavior of @file, and are confused by the additional -L.

Identify presence of -I or -L from the test framework and
skip those tests.

Tested cris-elf and x86_64-pc-linux-gnu.

Ok to commit?

gcc/testsuite:
* gcc.misc-tests/outputs.exp: Skip @file -save-temps
tests if target test-framework has -L or -I options.
---
 gcc/testsuite/gcc.misc-tests/outputs.exp | 12 
 1 file changed, 12 insertions(+)

diff --git a/gcc/testsuite/gcc.misc-tests/outputs.exp 
b/gcc/testsuite/gcc.misc-tests/outputs.exp
index 4d904bde31d5..ebd61448bfdd 100644
--- a/gcc/testsuite/gcc.misc-tests/outputs.exp
+++ b/gcc/testsuite/gcc.misc-tests/outputs.exp
@@ -57,10 +57,18 @@ set gld [check_effective_target_gld]
 # We do not compile to an executable, because that requires naming an output.
 set link_options ""
 set dest [target_info name]
+set skip_atsave 0
 foreach i { ldflags libs ldscript } {
 if {[board_info $dest exists $i]} {
set opts [board_info $dest $i]
set nopts [gcc_adjust_linker_flags_list $opts]
+
+   # Options -I and -L affect the behavior of @files
+   # wrt. temporary files.  Skip @file -save-temps tests if we
+   # have any of them in the board-flags.
+   if { [string match "* -\[LI\]*" " $nopts"]} {
+   set skip_atsave 1
+   }
foreach opt $nopts {
append link_options " additional_flags=$opt"
}
@@ -298,10 +306,12 @@ outest "$b exe savetmp namedb" $sing "-o $b.exe 
-save-temps" {} {{--0.i --0.s --
 outest "$b exe savetmp named2" $mult "-o $b.exe -save-temps" {} {{--1.i --1.s 
--1.o --2.i --2.s --2.o .exe}}
 
 # Additional files are created when an @file is used
+if !$skip_atsave {
 outest "$b exe savetmp namedb" $sing "@/dev/null -o $b.exe -save-temps" {} 
{{--0.i --0.s --0.o .args.0 !!$gld .ld1_args !0 .exe}}
 outest "$b exe savetmp named2" $mult "@/dev/null -o $b.exe -save-temps" {} 
{{--1.i --1.s --1.o --2.i --2.s --2.o .args.0 !!$gld .ld1_args !0 .exe}}
 outest "$b exe savetmp named2" $mult "@/dev/null -I dummy -o $b.exe 
-save-temps" {} {{--1.i --1.s --1.o --2.i --2.s --2.o -args.0 -args.1 .args.2 
!!$gld .ld1_args !0 .exe}}
 outest "$b exe savetmp named2" $mult "@/dev/null -I dummy -L dummy -o $b.exe 
-save-temps" {} {{--1.i --1.s --1.o --2.i --2.s --2.o -args.0 -args.1 .args.2 
.args.3 !!$gld .ld1_args !0 .exe}}
+}
 
 # Setting the main output to a dir selects it as the default aux
 # location.
@@ -657,8 +667,10 @@ outest "$b lto sing named" $sing "-o $b.exe -O2 -flto 
-fno-use-linker-plugin -fl
 outest "$b lto mult named" $mult "-o $b.exe -O2 -flto -fno-use-linker-plugin 
-flto-partition=one -fdump-ipa-icf-optimized -fdump-rtl-final -fstack-usage" {} 
{{--1.c.???i.icf --1.c.???r.final --2.c.???i.icf --2.c.???r.final .wpa.???i.icf 
.ltrans0.ltrans.???r.final .ltrans0.ltrans.su .exe}}
 outest "$b lto sing nameddir" $sing "-o dir/$b.exe -O2 -flto 
-fno-use-linker-plugin -flto-partition=one -fdump-ipa-icf-optimized 
-fdump-rtl-final -fstack-usage" {dir/} {{--0.c.???i.icf --0.c.???r.final 
.wpa.???i.icf .ltrans0.ltrans.???r.final .ltrans0.ltrans.su .exe} {}}
 outest "$b lto mult nameddir" $mult "-o dir/$b.exe -O2 -flto 
-fno-use-linker-plugin -flto-partition=one -fdump-ipa-icf-optimized 
-fdump-rtl-final -fstack-usage" {dir/} {{--1.c.???i.icf --1.c.???r.final 
--2.c.???i.icf --2.c.???r.final .wpa.???i.icf .ltrans0.ltrans.???r.final 
.ltrans0.ltrans.su .exe} {}}
+if !$skip_atsave {
 outest "$b lto sing unnamed" $sing "@/dev/null -O2 -flto 
-fno-use-linker-plugin -flto-partition=one -fdump-ipa-icf-optimized 
-fdump-rtl-final -fstack-usage -save-temps $oaout" {} {{a--0.c.???i.icf 
a--0.c.???r.final a.wpa.???i.icf a.ltrans0.ltrans.???r.final 
a.ltrans0.ltrans.su a--0.o a--0.s a--0.i a.ltrans0.o a.ltrans.out 
a.ltrans0.ltrans.o a.ltrans0.ltrans_args a.args.0 a.ltrans0.ltrans.s 
a.wpa.args.0 a.lto_args a.ld1_args a.ltrans_args a.ltrans0.ltrans.args.0 
a.ld_args $aout}}
 }
+}
 
 # -dumpbase without -dumpdir.  The trailing dumppfx dash after it is
 # combined with dumpbase turns into a period when passed to lto as
-- 
2.11.0



Re: [RFC][patch for gcc12][version 1] add -ftrivial-auto-var-init and variable attribute "uninitialized" to gcc

2021-02-24 Thread Kees Cook via Gcc-patches
(please keep me in CC, I'm not subscribed...)

On Thu Feb 18, 2021 Qing Zhao said:
> Initialize automatic variables with new first class option 
> -ftrivial-auto-var-init=[uninitialized|pattern|zero]

Yay! I'm really excited to see this. Thank you for working on
it! I've built GCC with this applied, and it works out of the box
for a Linux kernel build, which correctly detects the availability
of -ftrivial-auto-var-init=[pattern|zero] for the respective
CONFIG_INIT_STACK_ALL_PATTERN and CONFIG_INIT_STACK_ALL_ZERO options.

The output from the kernel's CONFIG_TEST_STACKINIT module shows coverage
for most uninitialized cases. Yay! :)

It looks like there is still some issues with padding and pre-case
switch variables. Here's the test output, FWIW:

test_stackinit: u8_zero ok
test_stackinit: u16_zero ok
test_stackinit: u32_zero ok
test_stackinit: u64_zero ok
test_stackinit: char_array_zero ok
test_stackinit: small_hole_zero ok
test_stackinit: big_hole_zero ok
test_stackinit: trailing_hole_zero ok
test_stackinit: packed_zero ok
test_stackinit: small_hole_dynamic_partial ok
test_stackinit: big_hole_dynamic_partial ok
test_stackinit: trailing_hole_dynamic_partial ok
test_stackinit: packed_dynamic_partial ok
test_stackinit: small_hole_static_partial ok
test_stackinit: big_hole_static_partial ok
test_stackinit: trailing_hole_static_partial ok
test_stackinit: packed_static_partial ok
test_stackinit: small_hole_static_all FAIL (uninit bytes: 3)
test_stackinit: big_hole_static_all FAIL (uninit bytes: 61)
test_stackinit: trailing_hole_static_all FAIL (uninit bytes: 7)
test_stackinit: packed_static_all ok
test_stackinit: small_hole_dynamic_all FAIL (uninit bytes: 3)
test_stackinit: big_hole_dynamic_all FAIL (uninit bytes: 61)
test_stackinit: trailing_hole_dynamic_all FAIL (uninit bytes: 7)
test_stackinit: packed_dynamic_all ok
test_stackinit: small_hole_runtime_partial ok
test_stackinit: big_hole_runtime_partial ok
test_stackinit: trailing_hole_runtime_partial ok
test_stackinit: packed_runtime_partial ok
test_stackinit: small_hole_runtime_all ok
test_stackinit: big_hole_runtime_all ok
test_stackinit: trailing_hole_runtime_all ok
test_stackinit: packed_runtime_all ok
test_stackinit: u8_none ok
test_stackinit: u16_none ok
test_stackinit: u32_none ok
test_stackinit: u64_none ok
test_stackinit: char_array_none ok
test_stackinit: switch_1_none FAIL (uninit bytes: 8)
test_stackinit: switch_2_none FAIL (uninit bytes: 8)
test_stackinit: small_hole_none ok
test_stackinit: big_hole_none ok
test_stackinit: trailing_hole_none ok
test_stackinit: packed_none ok
test_stackinit: user ok
test_stackinit: failures: 8

The kernel's test for this is a mess[1] of macros I used to avoid losing
my sanity from cut/pasting, but it makes the tests hard to read. To
break it out, the failing cases are due to padding, as seen with the
"test_small_hole", "test_big_hole", and "test_trailing_hole" structures:

/* Simple structure with padding likely to be covered by compiler. */
struct test_small_hole {
size_t one;
char two;
/* 3 byte padding hole here. */
int three;
unsigned long four;
};

/* Try to trigger unhandled padding in a structure. */
struct test_aligned {
u32 internal1;
u64 internal2;
} __aligned(64);

struct test_big_hole {
u8 one;
u8 two;
u8 three;
/* 61 byte padding hole here. */
struct test_aligned four;
} __aligned(64);

struct test_trailing_hole {
char *one;
char *two;
char *three;
char four;
/* "sizeof(unsigned long) - 1" byte padding hole here. */
};

They fail when they're statically initialized (either fully or
partially), for example:

struct test_..._hole instance = { .two = ..., };

or

struct test_..._hole instance = { .one = ...,
  .two = ...,
  .three = ...,
  .four = ...,
};

The last case is for switch variables outside of case statements, like
"var" here:

switch (path) {
unsigned long var;

case ..:
...
case ..:
...
...
}


I'm really looking forward to having this available. Thanks again!

-Kees

[1] 
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/lib/test_stackinit.c

-- 
Kees Cook


Re: [PATCH] c++: Private parent access check for using decls [PR19377]

2021-02-24 Thread Jason Merrill via Gcc-patches

On 2/24/21 4:17 PM, Anthony Sharp wrote:

"special"



It wouldn't be my code if it didn't have sp3ling mstakes innit!
Actually to be fair I already changed that spelling mistake a few days
ago in my local code ;)

I was actually thinking about this last night as I was falling asleep
(as you do) and I realised that the whole of my using decl lookup is
redundant. I can simply do this (formatting probably messes up here):

/* 1.  If the "using" keyword is used to inherit DECL within the parent,
  this may cause DECL to be private, so we should return the using
  statement as the source of the problem.

  Scan the fields of PARENT_BINFO and see if there are any using decls.  If
  there are, see if they inherit DECL.  If they do, that's where DECL must
  have been declared private.  */

   for (tree parent_field = TYPE_FIELDS (BINFO_TYPE (parent_binfo));
parent_field;
parent_field = DECL_CHAIN (parent_field))
 {
   /* Not necessary, but also check TREE_PRIVATE for the sake of
   eliminating obviously non-relevant using decls.  */
   if (TREE_CODE (parent_field) == USING_DECL
  && TREE_PRIVATE (parent_field))
{
/* If the using statement inherits DECL, it is the source of the
  access failure, so return it.  */
  if (cp_tree_equal (strip_using_decl (parent_field), decl))
return parent_field;
}
 }

I was wrong to say that the using decl does not store "where it came
from/what it inherits" - that's exactly what strip_using_decl
achieves. I think the problem was that when I did my initial testing
in trying out ways to get the original decl, I didn't strip it, so the
comparison failed, which led me to make the whole redundant lookup,
blah blah blah.

I've run a quick test and it seems to work, even with the overloads.

Will test it some more and if all's good I will probably send a new
patch some time this weekend.


Sounds good, though strip_using_decl (parent_field) may be overloaded if 
the using-decl brings in multiple functions with that name.


Jason



Re: [PATCH] c++: const_cast of null pointer in constant expr [PR99176]

2021-02-24 Thread Jason Merrill via Gcc-patches

On 2/24/21 5:34 PM, Marek Polacek wrote:

Here we reject

   constexpr const int *p = nullptr;
   constexpr int *q = const_cast(p);

with "conversion of 'const int*' null pointer to 'int*' is not a
constant expression", which seems bogus.  This code has been rejected
since r238909 which added the can_convert check when converting a null
pointer.  I'm not finding any standard rule that this check was supposed
to enforce.  The original discussion was here

and here
.

Since can_convert never assumes a C-style cast, it rejects casting
away constness as in the test above and in:

   constexpr int *q = (int *)(const int *) nullptr;

Removing the check only breaks constexpr-nullptr-2.C by not giving any
diagnostic for line 229:

   constexpr B *pb2 = static_cast(pa0);  // { dg-error "not a constant 
expression" }

but the cast seems to be valid: we do [expr.static.cast]/7, and
[expr.const] only says that a reinterpreter_cast and converting from
void* is invalid in constexpr.  The can_convert check rejected convering
from void *, but only when converting from a null pointer, so it's not
good enough.  So I've added a check to catch conversions from cv void*.
I realize it's not a great time to be adding additional checking, but
removing the can_convert check would then technically be a regression.
(I could perhaps limit the new check to only trigger for integer_zerop
and then remove it in GCC 12.)


That sounds safest.


Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

gcc/cp/ChangeLog:

DR 1312
PR c++/99176
* constexpr.c (cxx_eval_constant_expression): Reject casting
from void * as per DR 1312.  Don't check can_convert.

gcc/testsuite/ChangeLog:

DR 1312
PR c++/99176
* g++.dg/cpp0x/constexpr-nullptr-2.C: Adjust dg-error.
* g++.dg/cpp0x/constexpr-cast2.C: New test.
* g++.dg/cpp0x/constexpr-cast3.C: New test.
---
  gcc/cp/constexpr.c| 49 ---
  gcc/testsuite/g++.dg/cpp0x/constexpr-cast2.C  | 16 ++
  gcc/testsuite/g++.dg/cpp0x/constexpr-cast3.C  | 14 ++
  .../g++.dg/cpp0x/constexpr-nullptr-2.C|  4 +-
  4 files changed, 64 insertions(+), 19 deletions(-)
  create mode 100644 gcc/testsuite/g++.dg/cpp0x/constexpr-cast2.C
  create mode 100644 gcc/testsuite/g++.dg/cpp0x/constexpr-cast3.C

diff --git a/gcc/cp/constexpr.c b/gcc/cp/constexpr.c
index 377fe322ee8..adf575d3dc6 100644
--- a/gcc/cp/constexpr.c
+++ b/gcc/cp/constexpr.c
@@ -6653,6 +6653,37 @@ cxx_eval_constant_expression (const constexpr_ctx *ctx, 
tree t,
return t;
  }
  
+	/* [expr.const]: a conversion from type cv void* to a pointer-to-object

+  type cannot be part of a core constant expression as a resolution to
+  DR 1312.  */
+   if (TYPE_PTROB_P (type)
+   && TYPE_PTR_P (TREE_TYPE (op))
+   && VOID_TYPE_P (TREE_TYPE (TREE_TYPE (op)))
+   /* Inside a call to std::construct_at or to
+  std::allocator::{,de}allocate, we permit casting from void*
+  because that is compiler-generated code.  */
+   && !(ctx->call
+&& ctx->call->fundef
+&& (is_std_construct_at (ctx->call->fundef->decl)
+|| is_std_allocator_allocate (ctx->call->fundef->decl


I wonder about adding overloads that take constexpr_call* so you don't 
need the non-null checks here.  Up to you.



+ {
+   /* Likewise, don't error when casting from void* when OP is
+   uninit and similar.  */
+   tree sop = tree_strip_nop_conversions (op);
+   if (TREE_CODE (sop) == ADDR_EXPR
+   && VAR_P (TREE_OPERAND (sop, 0))
+   && DECL_ARTIFICIAL (TREE_OPERAND (sop, 0)))
+ /* OK */;
+   else
+ {
+   if (!ctx->quiet)
+ error_at (loc, "cast from %qT is not allowed",
+   TREE_TYPE (op));
+   *non_constant_p = true;
+   return t;
+ }
+ }
+
if (TREE_CODE (op) == PTRMEM_CST && !TYPE_PTRMEM_P (type))
  op = cplus_expand_constant (op);
  
@@ -6671,26 +6702,10 @@ cxx_eval_constant_expression (const constexpr_ctx *ctx, tree t,

if (TYPE_REF_P (type))
  {
if (!ctx->quiet)
- error_at (loc,
-   "dereferencing a null pointer");
+ error_at (loc, "dereferencing a null pointer");
*non_constant_p = true;
return t;
  }
-   else if (TYPE_PTR_P (TREE_TYPE (op)))
- {
-   tree from = TREE_TYPE (op);
-
-   if (!can_convert (type, from, tf_none))
- {
-

Re: [PATCH] avoid -Wnonull for dynamic_cast (PR 99251)

2021-02-24 Thread Jason Merrill via Gcc-patches

On 2/24/21 5:25 PM, Martin Sebor wrote:

In r11-6900 (PR 98646 - static_cast confuses -Wnonnull) we decided
that issuing -Wnonnull for dereferencing the result of dynamic_cast
was helpful despite the false positives it causes when the pointer
is guaranteed not to be null because of a prior test.

The test case in PR 99251 along with the feedback I got from Martin
Liska have convinced me it was not the right decision.

The attached patch arranges for dynamic_cast to also suppress -Wnonnull
analogously to static_cast.  Since there already is a helper function
that builds the if-not-null test (ifnonnull) and sets TREE_NO_WARNING,
I factored out the corresponding code from build_base_path that sets
the additional TREE_NO_WARNING bit for static_cast into the function
and called it from both places.  I also renamed the function to make
its purpose clearer and for consistency with other build_xxx APIs.


Let's call it build_if_nonnull, as it builds the COND_EXPR as well as 
the test.



+  /* The dynamic_cast might fail but so a warning might be justified
+ but not when the operand is guarded.  See pr99251.  */
+  if (B *q = p->bptr ())
+dynamic_cast(q)->g ();// { dg-bogus "\\\[-Wnonnull" }


This guard is no more necessary than it is for static_cast; both cases 
deal with null arguments.  Let's not add these checks to the testcases.


This guard doesn't check for the mentioned case of dynamic_cast failing 
because the B* does not in fact point to a C.


I think we can just change the dg-warning to dg-bogus.  Sure, 
dynamic_cast might fail, but AFAICT -Wnonnull isn't supposed to warn 
about arguments that *might* be null, only arguments that are *known* to 
be null.


Jason



Re: [PATCH v3 6/6] RISC-V: Fix matches against subreg with a bytenum of 0 in riscv.md

2021-02-24 Thread Kito Cheng via Gcc-patches
Hi Jim:

Could you take a look for this patch,
I am not sure it's the right change or not for big-endian,

On Thu, Feb 25, 2021 at 6:39 AM Marcus Comstedt  wrote:
>
> These all intend the least significant subpart of the register.
> Use the same endian-neutral "subreg_lowpart_operator" predicate that
> ARM does instead.
>
> gcc/
> * config/riscv/predicates.md (subreg_lowpart_operator): New predicate
> * config/riscv/riscv.md (*addsi3_extended2, *subsi3_extended2)
> (*negsi2_extended2, *mulsi3_extended2, *si3_mask)
> (*si3_mask_1, *di3_mask, *di3_mask_1)
> (*si3_extend_mask, *si3_extend_mask_1): Use
> new predicate "subreg_lowpart_operator"
> ---
>  gcc/config/riscv/predicates.md |  5 +++
>  gcc/config/riscv/riscv.md  | 70 +-
>  2 files changed, 40 insertions(+), 35 deletions(-)
>
> diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md
> index ef821add663..23211513554 100644
> --- a/gcc/config/riscv/predicates.md
> +++ b/gcc/config/riscv/predicates.md
> @@ -198,6 +198,11 @@
>  (define_predicate "signed_order_operator"
>(match_code "eq,ne,lt,le,ge,gt"))
>
> +(define_predicate "subreg_lowpart_operator"
> +  (ior (match_code "truncate")
> +   (and (match_code "subreg")
> +(match_test "subreg_lowpart_p (op)"
> +
>  (define_predicate "fp_native_comparison"
>(match_code "eq,lt,le,gt,ge"))
>
> diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
> index fcdcc3abaa0..c3687d57047 100644
> --- a/gcc/config/riscv/riscv.md
> +++ b/gcc/config/riscv/riscv.md
> @@ -480,9 +480,9 @@
>  (define_insn "*addsi3_extended2"
>[(set (match_operand:DI   0 "register_operand" "=r,r")
> (sign_extend:DI
> - (subreg:SI (plus:DI (match_operand:DI 1 "register_operand" " r,r")
> - (match_operand:DI 2 "arith_operand"" r,I"))
> -0)))]
> + (match_operator:SI 3 "subreg_lowpart_operator"
> +[(plus:DI (match_operand:DI 1 "register_operand" " r,r")
> +  (match_operand:DI 2 "arith_operand"" r,I"))])))]
>"TARGET_64BIT"
>"add%i2w\t%0,%1,%2"
>[(set_attr "type" "arith")
> @@ -536,9 +536,9 @@
>  (define_insn "*subsi3_extended2"
>[(set (match_operand:DI0 "register_operand" "= r")
> (sign_extend:DI
> - (subreg:SI (minus:DI (match_operand:DI 1 "reg_or_0_operand" " rJ")
> -  (match_operand:DI 2 "register_operand" "  r"))
> -0)))]
> + (match_operator:SI 3 "subreg_lowpart_operator"
> +   [(minus:DI (match_operand:DI 1 "reg_or_0_operand" " rJ")
> +  (match_operand:DI 2 "register_operand" "  r"))])))]
>"TARGET_64BIT"
>"subw\t%0,%z1,%2"
>[(set_attr "type" "arith")
> @@ -572,8 +572,8 @@
>  (define_insn "*negsi2_extended2"
>[(set (match_operand:DI 0 "register_operand" "=r")
> (sign_extend:DI
> -(subreg:SI (neg:DI (match_operand:DI 1 "register_operand" " r"))
> -   0)))]
> +(match_operator:SI 2 "subreg_lowpart_operator"
> +  [(neg:DI (match_operand:DI 1 "register_operand" " r"))])))]
>"TARGET_64BIT"
>"negw\t%0,%1"
>[(set_attr "type" "arith")
> @@ -627,9 +627,9 @@
>  (define_insn "*mulsi3_extended2"
>[(set (match_operand:DI   0 "register_operand" "=r")
> (sign_extend:DI
> - (subreg:SI (mult:DI (match_operand:DI 1 "register_operand" " r")
> - (match_operand:DI 2 "register_operand" " r"))
> -0)))]
> + (match_operator:SI 3 "subreg_lowpart_operator"
> +   [(mult:DI (match_operand:DI 1 "register_operand" " r")
> + (match_operand:DI 2 "register_operand" " r"))])))]
>"TARGET_MUL && TARGET_64BIT"
>"mulw\t%0,%1,%2"
>[(set_attr "type" "imul")
> @@ -1591,10 +1591,10 @@
>[(set (match_operand:SI 0 "register_operand" "= r")
> (any_shift:SI
> (match_operand:SI 1 "register_operand" "  r")
> -   (subreg:QI
> -(and:SI
> - (match_operand:SI 2 "register_operand"  "r")
> - (match_operand 3 "const_int_operand")) 0)))]
> +   (match_operator 4 "subreg_lowpart_operator"
> +[(and:SI
> +  (match_operand:SI 2 "register_operand"  "r")
> +  (match_operand 3 "const_int_operand"))])))]
>"(INTVAL (operands[3]) & (GET_MODE_BITSIZE (SImode)-1))
> == GET_MODE_BITSIZE (SImode)-1"
>"#"
> @@ -1610,10 +1610,10 @@
>[(set (match_operand:SI 0 "register_operand" "= r")
> (any_shift:SI
> (match_operand:SI 1 "register_operand" "  r")
> -   (subreg:QI
> -(and:DI
> - (match_operand:DI 2 "register_operand"  "r")
> - (match_operand 3 "const_int_operand")) 0)))]
> +  

Re: [PATCH, V2] Add conversions between _Float128 and Decimal.

2021-02-24 Thread Joseph Myers
On Wed, 24 Feb 2021, Segher Boessenkool wrote:

> The other option, which we need for correctness *anyway*, is to have
> libgcc do the conversion without using any specific libc.

Which ends up being quite complicated (see e.g. 
https://www.cl.cam.ac.uk/~jrh13/papers/decimal.pdf for a more detailed 
discussion of how to do such conversions and how to determine the worst 
cases for correct rounding of them).

-- 
Joseph S. Myers
jos...@codesourcery.com


[committed] analyzer: fix false positive on realloc [PR99193]

2021-02-24 Thread David Malcolm via Gcc-patches
PR analyzer/99193 describes various false positives from
-Wanalyzer-mismatching-deallocation on realloc(3) calls
of the form:

|   31 |   void *p = malloc (1024);
|  | ^
|  | |
|  | (1) allocated here (expects deallocation with ‘free’)
|   32 |   void *q = realloc (p, 4096);
|  | ~
|  | |
|  | (2) deallocated with ‘realloc’ here; allocation at (1) 
expects deallocation with ‘free’
|

The underlying issue is that the analyzer has no knowledge of
realloc(3), and realloc has awkward semantics.

Unfortunately, the analyzer is currently structured so that each call
statement can only have at most one successor state; there is no
way to "bifurcate" the state, or have N-way splits into multiple
outcomes.  The existing "on_stmt" code works on a copy of the next
state, updating it in place, rather than copying it and making any
necessary changes.  I did this as an optimization to avoid unnecessary
copying of state objects, but it makes it hard to support multiple
outcomes.  (ideally our state objects would be immutable and thus
support trivial copying, alternatively, C++11 move semantics may
help here)

I attempted a few approaches to implementing bifurcation within the
existing state-update framework, but they were messy and thus likely
buggy; a proper implementation would rework state-updating to
generate copies, but this would be a major change, and seems too
late for GCC 11.

As a workaround, this patch implements enough of realloc(3) to
suppress the false positives.

This fixes the false positives in PR analyzer/99193.
I've filed PR analyzer/99260 to track "properly" implementing realloc(3).

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r11-7381-ga6baafcac5308be1a5d92c0b2a179495b7a24b52

gcc/analyzer/ChangeLog:
PR analyzer/99193
* region-model-impl-calls.cc (region_model::impl_call_realloc): New.
* region-model.cc (region_model::on_call_pre): Call it.
* region-model.h (region_model::impl_call_realloc): New decl.
* sm-malloc.cc (enum wording): Add WORDING_REALLOCATED.
(malloc_state_machine::m_realloc): New field.
(use_after_free::describe_state_change): Add case for
WORDING_REALLOCATED.
(use_after_free::describe_final_event): Likewise.
(malloc_state_machine::malloc_state_machine): Initialize
m_realloc.
(malloc_state_machine::on_stmt): Handle realloc by calling...
(malloc_state_machine::on_realloc_call): New.

gcc/testsuite/ChangeLog:
PR analyzer/99193
* gcc.dg/analyzer/pr99193-1.c: New test.
* gcc.dg/analyzer/pr99193-2.c: New test.
* gcc.dg/analyzer/pr99193-3.c: New test.
* gcc.dg/analyzer/realloc-1.c: New test.
---
 gcc/analyzer/region-model-impl-calls.cc   | 11 
 gcc/analyzer/region-model.cc  |  8 +++
 gcc/analyzer/region-model.h   |  1 +
 gcc/analyzer/sm-malloc.cc | 70 ++-
 gcc/testsuite/gcc.dg/analyzer/pr99193-1.c | 65 +
 gcc/testsuite/gcc.dg/analyzer/pr99193-2.c | 68 ++
 gcc/testsuite/gcc.dg/analyzer/pr99193-3.c | 48 
 gcc/testsuite/gcc.dg/analyzer/realloc-1.c | 55 ++
 8 files changed, 324 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/pr99193-1.c
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/pr99193-2.c
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/pr99193-3.c
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/realloc-1.c

diff --git a/gcc/analyzer/region-model-impl-calls.cc 
b/gcc/analyzer/region-model-impl-calls.cc
index 72404a5bc61..f83c12b5cb7 100644
--- a/gcc/analyzer/region-model-impl-calls.cc
+++ b/gcc/analyzer/region-model-impl-calls.cc
@@ -428,6 +428,17 @@ region_model::impl_call_operator_delete (const 
call_details )
   return false;
 }
 
+/* Handle the on_call_pre part of "realloc".  */
+
+void
+region_model::impl_call_realloc (const call_details &)
+{
+  /* Currently we don't support bifurcating state, so there's no good
+ way to implement realloc(3).
+ For now, malloc_state_machine::on_realloc_call has a minimal
+ implementation to suppress false positives.  */
+}
+
 /* Handle the on_call_pre part of "strcpy" and "__builtin_strcpy_chk".  */
 
 void
diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-model.cc
index 2053f8f79bb..96ed549adf6 100644
--- a/gcc/analyzer/region-model.cc
+++ b/gcc/analyzer/region-model.cc
@@ -791,6 +791,9 @@ region_model::on_call_pre (const gcall *call, 
region_model_context *ctxt,
impl_call_memset (cd);
return false;
break;
+ case BUILT_IN_REALLOC:
+   impl_call_realloc (cd);
+   return false;
  case BUILT_IN_STRCPY:
  case BUILT_IN_STRCPY_CHK:

Re: [PATCH] match.pd, expand: Fold VCE from integer with [0, 1] range to bool into NOP_EXPR [PR80635]

2021-02-24 Thread Martin Sebor via Gcc-patches

On 2/24/21 5:13 AM, Jakub Jelinek via Gcc-patches wrote:

On Wed, Feb 24, 2021 at 11:50:10AM +0100, Richard Biener wrote:

In the PR using NOP_EXPR has been discussed as one possibility and has been
rejected because at expansion it will emit a superfluous & 1 operation.
I still think it is a good idea to use NOP_EXPR and so have changed
expansion to not emit that & 1 operation in that case.  Both spots are
done with tight conditions (bool only etc.), as I'd like to fix just
that case and not introduce a wider general optimization, but perhaps
later we could lift it and do a general range of arbitrary
type_has_mode_precision_p to non-type_has_mode_precision_p with same
TYPE_MODE case.


But it still is a pessimization.  VCE says there's no code to be
generated but NOP_EXPR says there is a conversion involved, even
if you later elide it via ssa_name_has_boolean_range.


I'm not convinced it is a pessimization.
Because, a NOP_EXPR is something the compiler can optimize orders of
magnitude more than VCE.
To back that up by some random numbers,
grep CASE_CONVERT: gimple-match.c | wc -l; grep VIEW_CONVERT_EXPR: 
gimple-match.c | wc -l
417
18


So I wonder what other optimizations are prevented here?



Why does uninit warn with VCE but not with NOP_EXPR?  Or does the
warning disappear because of those other optimizations you mention?


Can you comment on Jeff's POC patch in the PR?  Would it make sense
to apply it (with adjustments if necessary) as well to make the warning
more robust in case the VCE comes back?

Martin



The optimization that it prevents is in this particular case in tree-vrp.c
(vrp_simplify_cond_using_ranges):

   if (!is_gimple_assign (def_stmt)
   || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
 return;
so it punts on VIEW_CONVERT_EXPR, with NOP_EXPR it optimizes that:
   _9 = (bool) maybe_a$4_7;
   if (_9 != 0)
into:
   _9 = (bool) maybe_a$4_7;
   if (maybe_a$4_7 != 0)

Now, if I apply my patch but manually disable this
vrp_simplify_cond_using_ranges optimization, then the uninit warning is
back, so on the uninit side it is not about VIEW_CONVERT_EXPR vs. NOP_EXPR,
both are bad there, uninit wants the guarding condition to be
that SSA_NAME and not some demotion cast thereof.
We have:
   # maybe_a$m_6 = PHI <_5(4), maybe_a$m_4(D)(6)>
   # maybe_a$4_7 = PHI <1(4), 0(6)>
...
One of:
   _9 = VIEW_CONVERT_EXPR(maybe_a$4_7);
   if (_9 != 0)
or:
   _9 = (bool) maybe_a$4_7;
   if (_9 != 0)
or:
   if (maybe_a$4_7 != 0)
followed by:
 goto ; [0.00%]
   else
 goto ; [0.00%]
...
[count: 0]:
   set (maybe_a$m_6);
and uninit wants to see that maybe_a$m_4(D) is not used if
bb 11 is encountered.

So, if you are strongly opposed to the posted patch, I guess the fix can be
(at least fixes the testcase; completely untested except for
make check-c++-all RUNTESTFLAGS='--target_board=unix\{-m32,-m64\} 
dg.exp=pr80635*.C'
) following.
But, I fear there will be dozens of other spots where we'll punt on
optimizing when it is a VCE rather than NOP_EXPR.

2021-02-24  Jakub Jelinek  

PR tree-optimization/80635
* tree-vrp.c (vrp_simplify_cond_using_ranges): Also handle
VIEW_CONVERT_EXPR if modes are the same, innerop is integral and
has mode precision.

* g++.dg/warn/pr80635-1.C: New test.
* g++.dg/warn/pr80635-2.C: New test.

--- gcc/tree-vrp.c.jj   2021-02-24 12:56:58.573939572 +0100
+++ gcc/tree-vrp.c  2021-02-24 13:05:22.675326780 +0100
@@ -4390,11 +4390,24 @@ vrp_simplify_cond_using_ranges (vr_value
gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
tree innerop;
  
-  if (!is_gimple_assign (def_stmt)

- || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
+  if (!is_gimple_assign (def_stmt))
return;
  
-  innerop = gimple_assign_rhs1 (def_stmt);

+  switch (gimple_assign_rhs_code (def_stmt))
+   {
+   CASE_CONVERT:
+ innerop = gimple_assign_rhs1 (def_stmt);
+ break;
+   case VIEW_CONVERT_EXPR:
+ innerop = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
+ if (TYPE_MODE (TREE_TYPE (op0)) != TYPE_MODE (TREE_TYPE (innerop))
+ || !INTEGRAL_TYPE_P (TREE_TYPE (innerop))
+ || !type_has_mode_precision_p (TREE_TYPE (innerop)))
+   return;
+ break;
+   default:
+ break;
+   }
  
if (TREE_CODE (innerop) == SSA_NAME

  && !POINTER_TYPE_P (TREE_TYPE (innerop))
--- gcc/testsuite/g++.dg/warn/pr80635-1.C.jj2021-02-24 12:24:15.176834532 
+0100
+++ gcc/testsuite/g++.dg/warn/pr80635-1.C   2021-02-24 12:24:15.176834532 
+0100
@@ -0,0 +1,46 @@
+// PR tree-optimization/80635
+// { dg-do compile { target c++11 } }
+// { dg-options "-O2 -Wmaybe-uninitialized" }
+
+using size_t = decltype (sizeof (1));
+inline void *operator new (size_t, void *p) { return p; }
+template
+struct optional
+{
+  optional () : m_dummy (), live (false) {}
+  void emplace () { new (_item) T 

Re: [PATCH, V2] Add conversions between _Float128 and Decimal.

2021-02-24 Thread Segher Boessenkool
Hi!

On Wed, Feb 24, 2021 at 11:12:54PM +, Joseph Myers wrote:
> This change appears to have broken builds for powerpc in a configuration 
> that bootstraps a cross toolchain starting with a GCC build with no libc 
> available.
> 
> Specifically, such a bootstrap build uses --disable-decimal-float among 
> other options (in the first GCC build before libc has been built), to 
> disable GCC target library code that has any dependence on libc or libc 
> headers - dfp-bit.c uses libc headers, without an inhibit_libc 
> conditional, so cannot be used in such a bootstrap configuration.  Most of 
> the DFP code in libgcc is disabled by --disable-decimal-float, but it 
> seems the new conversions are not.

> The appropriate fix is not to build any of these new conversions in the 
> --disable-decimal-float case.  (That clearly makes sense anyway, even in 
> the absence of the bootstrap issue.)

Certainly.

The other option, which we need for correctness *anyway*, is to have
libgcc do the conversion without using any specific libc.


Segher


Re: PING [PATCH] avoid -Warray-bounds checks for vtable assignments (PR 98266)

2021-02-24 Thread Jason Merrill via Gcc-patches

On 2/23/21 6:07 PM, Martin Sebor wrote:

On 2/23/21 2:52 PM, Jason Merrill wrote:

On 2/23/21 11:02 AM, Martin Sebor wrote:

[CC Jason for any further comments/clarification]

On 2/9/21 10:49 AM, Martin Sebor wrote:

On 2/8/21 4:11 PM, Jeff Law wrote:



On 2/8/21 3:44 PM, Martin Sebor wrote:

On 2/8/21 3:26 PM, Jeff Law wrote:



On 2/8/21 2:56 PM, Martin Sebor wrote:

On 2/8/21 12:59 PM, Jeff Law wrote:



On 1/19/21 5:56 PM, Martin Sebor via Gcc-patches wrote:
Similar to the problem reported for -Wstringop-overflow in 
pr98266

and already fixed, -Warray-bounds is also susceptible to false
positives in assignments and copies involving virtual 
inheritance.
Because the two warnings don't share code yet (hopefully in 
GCC 12)

the attached patch adds its own workaround for this problem to
gimple-array-bounds.cc, this one slightly more crude because of
the limited insight the array bounds checking has into the 
checked

expressions.

Tested on x86_64-linux.

Martin

gcc-98266.diff

PR middle-end/98266 - bogus array subscript is partly outside 
array

bounds on virtual inheritance

gcc/ChangeLog:

  PR middle-end/98266
  * gimple-array-bounds.cc
(array_bounds_checker::check_array_bounds):
  Avoid checking references involving artificial members.

gcc/testsuite/ChangeLog:

  PR middle-end/98266
  * g++.dg/warn/Warray-bounds-15.C: New test.
It seems to me that we've got the full statement at some point  
and

thus
the full expression so at some point couldn't we detect when
TYPE_SIZE_UNIT!= DECL_SIZE_UNIT?  Or should we be using 
TYPE_SIZE_UNIT

rather than DECL_SIZE_UNIT in gimple-array-bounds.cc

Am I missing something?


The expression we're looking at when the false positive is issued
is the MEM_REF in the LHS of:

MEM[(struct D *) + 24B]._vptr.D =   [(void
*)&_ZTC1E24_1D + 24B];

TREE_TYPE(LHS) is D, DECL_SIZE_UNIT (D.2652) is 24, and
TYPE_SIZE_UNIT(D) is also 24, so there's no discrepancy between
DECL_SIZE and TYPE_SIZE.

So that seems like it's a different issue then, unrelated to 97595.
Right?


I think the underlying problem is the same.  We're getting a size
that doesn't correspond to what's actually being accessed, and it
happens because of the virtual inheritance.  In pr97595 Jason
suggested to use the decl/type size inequality to identify this
case but I think we could have just as well used DECL_ARTIFICIAL
instead.  At least the test cases from pr97595 both pass with
this change.

But in the 98266 case the type and decl sizes are the same.  So to be
true that would mean that the underlying type we're using to access
memory differs from its actual type.  Is that the case in the IL?  And
does this have wider implications for diagnostics or optimizations 
that

rely on accurate type sizing?

I'm just trying to make sure I understand, not accepting or rejecting
the patch yet.


The part of the IL with the MEM_REF is this:

void g ()
{
   void * D.2789;
   struct E D.2652;

    [local count: 1073741824]:
   E::E (, "");
   f ();

    [local count: 1073741824]:
   MEM[(struct D *) + 24B]._vptr.D =   [(void 
*)&_ZTC1E24_1D + 24B];

   ...

The access here is to the _vptr.D pointer member of D.2652 which is
just past the end of the parent object (as reflected by its SIZE):
it sets sets up the virtual table pointer.

The access in pr97595 is to the member subobject, which, as Jason
explained (and I accordingly documented under DECL_SIZE in tree.h),
is also laid out separately from the parent object.

These cases aren't exactly the same (which is also why the test
I added for -Warray-bounds in pr97595 didn't expose this bug) but
they are closely related.  The one here can be distinguished by
DECL_ARTIFICAL.  The other by the DECL_SIZE != TYPE_SIZE member
inequality.

Might this impact other warnings?  I'd say so if they don't take
these things into account.  I just learned about this in pr97595
which was a -Wstringop-overflow false positive but I also saw
a similar instance of -Warray-bounds with my patch to improve
caching and enhance array bounds checking.  I dealt with that
instance of the warning in that patch but proactively added
a test case to the fix for pr97595.  But the test case is focused
on the subobject access and not on one to the virtual table so
(as I said above) it didn't expose this bug.

Might this also impact optimizations?  I can imagine someone
unaware of this "gotcha" making the same "naive" assumption
I did, but I'd also expect such an invalid assumption to be
found either in code review or quickly cause problems in
testing.


Jeff, does this answer your question?


I don't see how the issue here depends on the artificiality of the 
vptr; I'd expect to see the same problem with a data member.  The 
problem is that a D base subobject is smaller than a complete D 
object, and in this case the base subobject is allocated such that if 
it were a full D object, it would overlap the end of E.  And we're 
checking the MEM_REF as though accessing a full D object, so 

Re: [PATCH] c++: ICE with deduction guide in checking type-dep [PR99009, PR97034]

2021-02-24 Thread Marek Polacek via Gcc-patches
Ping.

On Fri, Feb 12, 2021 at 06:12:12PM -0500, Marek Polacek via Gcc-patches wrote:
> We represent deduction guides with FUNCTION_DECLs, but they are built
> without DECL_CONTEXT, leading to an ICE in type_dependent_expression_p
> on the assert that the type of a function template with no dependent
> (innermost!) template arguments must be non-dependent.  Consider the
> attached class-deduction79.C: we create a deduction guide:
> 
>   template G(T)-> E::G
> 
> we deduce T and create a partial instantiation:
> 
>   G(T) -> E::G [with T = int]
> 
> And then do_class_deduction wants to create a CALL_EXPR from the above
> using build_new_function_call -> build_over_call which calls mark_used
> -> maybe_instantiate_noexcept -> type_dependent_expression_p.
> 
> There, the innermost template arguments are non-dependent (), but
> the fntype is dependent -- the return type is a TYPENAME_TYPE, and
> since we have no DECL_CONTEXT, this check holds:
> 
>   /* Otherwise, if the function decl isn't from a dependent scope, it can't be
>  type-dependent.  Checking this is important for functions with auto 
> return
>  type, which looks like a dependent type.  */
>   if (TREE_CODE (expression) == FUNCTION_DECL
>   && !(DECL_CLASS_SCOPE_P (expression)
>&& dependent_type_p (DECL_CONTEXT (expression)))
> 
> whereupon we ICE.
> 
> Experiments with setting DECL_CONTEXT didn't pan out.  So perhaps we
> just want to skip the assert for deduction guides, because they are
> a little special.  Better ideas solicited.
> 
> Bootstrapped/regtested on x86_64-pc-linux-gnu.
> 
> gcc/cp/ChangeLog:
> 
>   PR c++/97034
>   PR c++/99009
>   * pt.c (type_dependent_expression_p): Don't assert that the type
>   of a deduction guide must non-dependent.
> 
> gcc/testsuite/ChangeLog:
> 
>   PR c++/97034
>   PR c++/99009
>   * g++.dg/cpp1z/class-deduction79.C: New test.
>   * g++.dg/cpp1z/class-deduction80.C: New test.
>   * g++.dg/cpp2a/class-deduction-aggr8.C: New test.
>   * g++.dg/cpp2a/class-deduction-aggr9.C: New test.
> ---
>  gcc/cp/pt.c   |  5 -
>  .../g++.dg/cpp1z/class-deduction79.C  | 20 +++
>  .../g++.dg/cpp1z/class-deduction80.C  | 12 +++
>  .../g++.dg/cpp2a/class-deduction-aggr8.C  | 19 ++
>  .../g++.dg/cpp2a/class-deduction-aggr9.C  | 18 +
>  5 files changed, 73 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/g++.dg/cpp1z/class-deduction79.C
>  create mode 100644 gcc/testsuite/g++.dg/cpp1z/class-deduction80.C
>  create mode 100644 gcc/testsuite/g++.dg/cpp2a/class-deduction-aggr8.C
>  create mode 100644 gcc/testsuite/g++.dg/cpp2a/class-deduction-aggr9.C
> 
> diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
> index 60de8e93ff7..3e6f3407d40 100644
> --- a/gcc/cp/pt.c
> +++ b/gcc/cp/pt.c
> @@ -27282,7 +27282,10 @@ type_dependent_expression_p (tree expression)
>  && DECL_UNIQUE_FRIEND_P (expression)
>  && (!DECL_FRIEND_CONTEXT (expression)
>  || dependent_type_p (DECL_FRIEND_CONTEXT (expression
> -  && !DECL_LOCAL_DECL_P (expression))
> +  && !DECL_LOCAL_DECL_P (expression)
> +  /* We build deduction guides without any DECL_CONTEXT, but they can
> +  be type-dependent.  */
> +  && !deduction_guide_p (expression))
>  {
>gcc_assert (!dependent_type_p (TREE_TYPE (expression))
> || undeduced_auto_decl (expression));
> diff --git a/gcc/testsuite/g++.dg/cpp1z/class-deduction79.C 
> b/gcc/testsuite/g++.dg/cpp1z/class-deduction79.C
> new file mode 100644
> index 000..86a68248157
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/cpp1z/class-deduction79.C
> @@ -0,0 +1,20 @@
> +// PR c++/97034
> +// { dg-do compile { target c++17 } }
> +
> +template 
> +struct E {
> +  template 
> +  struct G {
> +T t;
> +G(T) { }
> +  };
> +
> +  void fn() { G{1}; }
> +};
> +
> +void
> +g ()
> +{
> +  E e;
> +  e.fn ();
> +}
> diff --git a/gcc/testsuite/g++.dg/cpp1z/class-deduction80.C 
> b/gcc/testsuite/g++.dg/cpp1z/class-deduction80.C
> new file mode 100644
> index 000..238024c508f
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/cpp1z/class-deduction80.C
> @@ -0,0 +1,12 @@
> +// PR c++/99009
> +// { dg-do compile { target c++17 } }
> +
> +template struct B {
> +  B(int = A()) {}
> +  template  struct A;
> +};
> +
> +template struct X {
> +  template  struct Y;
> +  X() { Y y; };
> +};
> diff --git a/gcc/testsuite/g++.dg/cpp2a/class-deduction-aggr8.C 
> b/gcc/testsuite/g++.dg/cpp2a/class-deduction-aggr8.C
> new file mode 100644
> index 000..399061100ae
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/cpp2a/class-deduction-aggr8.C
> @@ -0,0 +1,19 @@
> +// PR c++/97034
> +// { dg-do compile { target c++20 } }
> +
> +namespace N {
> +template  struct S {
> +  template  S(T, U);
> +};
> +} // namespace N
> +template  struct E {
> +  template  struct G { T t; };
> +  void fn() { 

Re: [PATCH RFA] cgraph: flatten and same_body aliases [PR96078]

2021-02-24 Thread Jason Merrill via Gcc-patches

On 2/16/21 1:52 PM, Jeff Law wrote:



On 2/11/21 10:18 PM, Jason Merrill via Gcc-patches wrote:

The patch for PR92372 made us start warning about a flatten attribute on an
alias.  But in the case of C++ 'tor base/complete variants, the user didn't
create the alias, so we shouldn't warn.

I could also remove the attribute in maybe_clone_body, but here seems a bit
better.

Tested x86_64-pc-linux-gnu.  OK for trunk?

gcc/ChangeLog:

PR c++/96078
* cgraph.c (cgraph_node::create_same_body_alias): Remove flatten
attribute from alias.

gcc/testsuite/ChangeLog:

PR c++/96078
* g++.dg/ext/attr-flatten1.C: New test.

But shouldn't we validate that we've got a C++ ctor/dtor rather than
blindly removing the attribute from all aliases?  ISTM like the patch
as-is would always suppress warnings when we create a same-body alias
regardless of why we created a same-body alias.


Fair enough.  How about this approach, instead?  If the target also has 
attribute flatten, we're getting the desired effect even though the 
called symbol is an alias.
>From 740414f268382625525a892fd14357f694ca4391 Mon Sep 17 00:00:00 2001
From: Jason Merrill 
Date: Thu, 11 Feb 2021 22:01:19 -0500
Subject: [PATCH] cgraph: flatten and same_body aliases [PR96078]
To: gcc-patches@gcc.gnu.org

The patch for PR92372 made us start warning about a flatten attribute on an
alias.  But in the case of C++ 'tor base/complete variants, the user didn't
create the alias.  If the alias target also has the attribute, the alias
points to a flattened function, so we shouldn't warn.

gcc/ChangeLog:

	PR c++/96078
	* cgraphunit.c (process_function_and_variable_attributes): Don't
	warn about flatten on an alias if the target also has it.
	* cgraph.h (symtab_node::get_alias_target_tree): New.

gcc/testsuite/ChangeLog:

	PR c++/96078
	* g++.dg/ext/attr-flatten1.C: New test.
---
 gcc/cgraph.h | 14 ++
 gcc/cgraphunit.c |  7 +--
 gcc/testsuite/g++.dg/ext/attr-flatten1.C |  9 +
 3 files changed, 28 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/ext/attr-flatten1.C

diff --git a/gcc/cgraph.h b/gcc/cgraph.h
index 47b5c02d770..4a1f89920f5 100644
--- a/gcc/cgraph.h
+++ b/gcc/cgraph.h
@@ -319,6 +319,9 @@ public:
   /* Return node that alias is aliasing.  */
   inline symtab_node *get_alias_target (void);
 
+  /* Return DECL that alias is aliasing.  */
+  inline tree get_alias_target_tree ();
+
   /* Set section for symbol and its aliases.  */
   void set_section (const char *section);
 
@@ -2665,6 +2668,17 @@ symtab_node::get_alias_target (void)
   return ref->referred;
 }
 
+/* Return the DECL (or identifier) that alias is aliasing.  Unlike the above,
+   this works whether or not the alias has been analyzed already.  */
+
+inline tree
+symtab_node::get_alias_target_tree ()
+{
+  if (alias_target)
+return alias_target;
+  return get_alias_target ()->decl;
+}
+
 /* Return next reachable static symbol with initializer after the node.  */
 
 inline symtab_node *
diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c
index b401f0817a3..1c74cee69ac 100644
--- a/gcc/cgraphunit.c
+++ b/gcc/cgraphunit.c
@@ -859,8 +859,11 @@ process_function_and_variable_attributes (cgraph_node *first,
   if (node->alias
 	  && lookup_attribute ("flatten", DECL_ATTRIBUTES (decl)))
 	{
-	  warning_at (DECL_SOURCE_LOCATION (node->decl), OPT_Wattributes,
-		  "% attribute is ignored on aliases");
+	  tree tdecl = node->get_alias_target_tree ();
+	  if (!tdecl || !DECL_P (tdecl)
+	  || !lookup_attribute ("flatten", DECL_ATTRIBUTES (tdecl)))
+	warning_at (DECL_SOURCE_LOCATION (decl), OPT_Wattributes,
+			"% attribute is ignored on aliases");
 	}
   if (DECL_PRESERVE_P (decl))
 	node->mark_force_output ();
diff --git a/gcc/testsuite/g++.dg/ext/attr-flatten1.C b/gcc/testsuite/g++.dg/ext/attr-flatten1.C
new file mode 100644
index 000..5bcbfb6f4aa
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/attr-flatten1.C
@@ -0,0 +1,9 @@
+// PR c++/96078
+// { dg-do compile { target c++11 } }
+
+struct A {
+[[gnu::flatten]] A() {}
+[[gnu::flatten]] ~A() {}
+};
+
+A a;
-- 
2.27.0



Re: [PATCH, V2] Add conversions between _Float128 and Decimal.

2021-02-24 Thread Joseph Myers
This change appears to have broken builds for powerpc in a configuration 
that bootstraps a cross toolchain starting with a GCC build with no libc 
available.

Specifically, such a bootstrap build uses --disable-decimal-float among 
other options (in the first GCC build before libc has been built), to 
disable GCC target library code that has any dependence on libc or libc 
headers - dfp-bit.c uses libc headers, without an inhibit_libc 
conditional, so cannot be used in such a bootstrap configuration.  Most of 
the DFP code in libgcc is disabled by --disable-decimal-float, but it 
seems the new conversions are not.  This results in errors of the form:

In file included from 
/scratch/jmyers/glibc-bot/src/gcc/libgcc/config/rs6000/_kf_to_sd.c:37:
/scratch/jmyers/glibc-bot/src/gcc/libgcc/dfp-bit.c:32:10: fatal error: stdio.h: 
No such file or directory
   32 | #include 
  |  ^
compilation terminated.

The appropriate fix is not to build any of these new conversions in the 
--disable-decimal-float case.  (That clearly makes sense anyway, even in 
the absence of the bootstrap issue.)

https://sourceware.org/pipermail/libc-testresults/2021q1/007576.html

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: [PATCH,rs6000] [v2] Optimize pcrel access of globals

2021-02-24 Thread Segher Boessenkool
On Mon, Feb 22, 2021 at 09:24:04PM -0600, acsaw...@linux.ibm.com wrote:
> This patch implements a RTL pass that looks for pc-relative loads of the
> address of an external variable using the PCREL_GOT relocation and a
> single load or store that uses that external address.
> 
> Produced by a cast of thousands:
>  * Michael Meissner
>  * Peter Bergner
>  * Bill Schmidt
>  * Alan Modra
>  * Segher Boessenkool
>  * Aaron Sawdey
> 
> This incorporates the changes requested in Segher's review. A few things I
> did not change were the insn-at-a-time scan that could be done with DF,

Yes, that would be a bigger (and possibly destabilising) change.

> and
> I did not change to using statistics.[ch] for the counters struct.

Okay.

> I did try
> to improve the naming, and rewrote a number of comments to make them 
> consistent
> with the code, and generally tried to make things more readable.

Great, thank you!

> +(define_insn "*pcrel_opt_ld_gpr"
> +  [(parallel [(set (match_operand:PCRELOPT_GPR 0 "int_reg_operand" "+r")
> +(unspec:PCRELOPT_GPR [(match_operand:PCRELOPT_GPR 1 
> "d_form_memory" "m")

That last line is a bit long.  One thing you can do is break after the [
character (and then indent two chars).  It's a bit ugly, but if you see
no better alternative :-)

> +  return which_alternative ? " %0,%1" : " 
> %0,%1";

Too long as well (but this is easy to break up, and that might be a good
idea even if it wasn't a too long line).

> +  switch (GET_CODE (mem))
> + {
> + case BSWAP:/* LFIWAX/LFIWZX/STFIWX.  */
> + case UNSPEC:
> + case UNSPEC_VOLATILE:   /* Leave this alone for obvious reasons.  */
> + case ROTATE:   /* lxvd2x.  */
> + case VEC_SELECT:
> +   return NULL_RTX;
> + default: ;
> + }
> +  if (GET_RTX_CLASS (GET_CODE (mem)) != RTX_UNARY)
> + return NULL_RTX;

All of the above except BSWAP are matched by this class test.  So I
would suggest writing it as

  if (GET_RTX_CLASS (GET_CODE (mem)) != RTX_UNARY
  || GET_CODE (mem) == BSWAP)
return 0;

> +  /* Rule out LFIWAX/LFIWZX/STFIWX.  */
> +  if (GET_CODE (mem) == BSWAP)
> + return NULL_RTX;

This is already handled.

> +static bool
> +insn_references_regno_p (rtx_insn *insn, unsigned int regno,
> +enum attr_type type)
> +{
> +  struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
> +  /* If we don't have the insn_info for some reason, do not attempt to 
> optimize
> + this reference.  */
> +  if (!insn_info)
> +return true;

It is better to make this an assert, and explicitly handle the cases
that legitimately have no insn_info (if any).

> @@ -26213,11 +26291,34 @@ void
>  rs6000_asm_output_opcode (FILE *stream)
>  {
>if (next_insn_prefixed_p)
> -fprintf (stream, "p");
> +{
> +  fprintf (stream, "p");
> +
> +  /* Reset the flag in the case where there are separate insn lines in 
> the
> +  sequence, so the 'p' is only emitted for the first line.  This shows up
> +  when we are doing the PCREL_OPT optimization, in that the label created
> +  with %r would have a leading 'p' printed.  */
> +  next_insn_prefixed_p = false;
> +}

So this part needs the symbol name changed, to say what this variable
*is*.  The other patch did that, the change will be trivial.

Other than that, this is all completely trivial.  It is in good enough
shape to merge now.  Thank you!  Let's see if Richi allows it into
mainline.


Segher


[PATCH] c++: const_cast of null pointer in constant expr [PR99176]

2021-02-24 Thread Marek Polacek via Gcc-patches
Here we reject

  constexpr const int *p = nullptr;
  constexpr int *q = const_cast(p);

with "conversion of 'const int*' null pointer to 'int*' is not a
constant expression", which seems bogus.  This code has been rejected
since r238909 which added the can_convert check when converting a null
pointer.  I'm not finding any standard rule that this check was supposed
to enforce.  The original discussion was here

and here
.

Since can_convert never assumes a C-style cast, it rejects casting
away constness as in the test above and in:

  constexpr int *q = (int *)(const int *) nullptr;

Removing the check only breaks constexpr-nullptr-2.C by not giving any
diagnostic for line 229:

  constexpr B *pb2 = static_cast(pa0);  // { dg-error "not a constant 
expression" }

but the cast seems to be valid: we do [expr.static.cast]/7, and
[expr.const] only says that a reinterpreter_cast and converting from
void* is invalid in constexpr.  The can_convert check rejected convering
from void *, but only when converting from a null pointer, so it's not
good enough.  So I've added a check to catch conversions from cv void*.
I realize it's not a great time to be adding additional checking, but
removing the can_convert check would then technically be a regression.
(I could perhaps limit the new check to only trigger for integer_zerop
and then remove it in GCC 12.)

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

gcc/cp/ChangeLog:

DR 1312
PR c++/99176
* constexpr.c (cxx_eval_constant_expression): Reject casting
from void * as per DR 1312.  Don't check can_convert.

gcc/testsuite/ChangeLog:

DR 1312
PR c++/99176
* g++.dg/cpp0x/constexpr-nullptr-2.C: Adjust dg-error.
* g++.dg/cpp0x/constexpr-cast2.C: New test.
* g++.dg/cpp0x/constexpr-cast3.C: New test.
---
 gcc/cp/constexpr.c| 49 ---
 gcc/testsuite/g++.dg/cpp0x/constexpr-cast2.C  | 16 ++
 gcc/testsuite/g++.dg/cpp0x/constexpr-cast3.C  | 14 ++
 .../g++.dg/cpp0x/constexpr-nullptr-2.C|  4 +-
 4 files changed, 64 insertions(+), 19 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/constexpr-cast2.C
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/constexpr-cast3.C

diff --git a/gcc/cp/constexpr.c b/gcc/cp/constexpr.c
index 377fe322ee8..adf575d3dc6 100644
--- a/gcc/cp/constexpr.c
+++ b/gcc/cp/constexpr.c
@@ -6653,6 +6653,37 @@ cxx_eval_constant_expression (const constexpr_ctx *ctx, 
tree t,
return t;
  }
 
+   /* [expr.const]: a conversion from type cv void* to a pointer-to-object
+  type cannot be part of a core constant expression as a resolution to
+  DR 1312.  */
+   if (TYPE_PTROB_P (type)
+   && TYPE_PTR_P (TREE_TYPE (op))
+   && VOID_TYPE_P (TREE_TYPE (TREE_TYPE (op)))
+   /* Inside a call to std::construct_at or to
+  std::allocator::{,de}allocate, we permit casting from void*
+  because that is compiler-generated code.  */
+   && !(ctx->call
+&& ctx->call->fundef
+&& (is_std_construct_at (ctx->call->fundef->decl)
+|| is_std_allocator_allocate (ctx->call->fundef->decl
+ {
+   /* Likewise, don't error when casting from void* when OP is
+   uninit and similar.  */
+   tree sop = tree_strip_nop_conversions (op);
+   if (TREE_CODE (sop) == ADDR_EXPR
+   && VAR_P (TREE_OPERAND (sop, 0))
+   && DECL_ARTIFICIAL (TREE_OPERAND (sop, 0)))
+ /* OK */;
+   else
+ {
+   if (!ctx->quiet)
+ error_at (loc, "cast from %qT is not allowed",
+   TREE_TYPE (op));
+   *non_constant_p = true;
+   return t;
+ }
+ }
+
if (TREE_CODE (op) == PTRMEM_CST && !TYPE_PTRMEM_P (type))
  op = cplus_expand_constant (op);
 
@@ -6671,26 +6702,10 @@ cxx_eval_constant_expression (const constexpr_ctx *ctx, 
tree t,
if (TYPE_REF_P (type))
  {
if (!ctx->quiet)
- error_at (loc,
-   "dereferencing a null pointer");
+ error_at (loc, "dereferencing a null pointer");
*non_constant_p = true;
return t;
  }
-   else if (TYPE_PTR_P (TREE_TYPE (op)))
- {
-   tree from = TREE_TYPE (op);
-
-   if (!can_convert (type, from, tf_none))
- {
-   if (!ctx->quiet)
- error_at (loc,
-   "conversion of %qT null pointer to %qT "
-   "is not a 

Re: [committed] libstdc++: Define std::to_chars overloads for __ieee128 [PR 98389]

2021-02-24 Thread Jonathan Wakely via Gcc-patches

On 24/02/21 17:00 +, Jonathan Wakely via Libstdc++ wrote:

@@ -815,6 +852,39 @@ template
return result;
  }

+namespace
+{
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wabi"
+  template
+  inline int
+  sprintf_ld(char* buffer, const char* format_string, T value, Extra... args)
+  {
+int len;
+
+#if _GLIBCXX_USE_C99_FENV_TR1 && defined(FE_TONEAREST)
+const int saved_rounding_mode = fegetround();
+if (saved_rounding_mode != FE_TONEAREST)
+  fesetround(FE_TONEAREST); // We want round-to-nearest behavior.
+#endif
+
+#ifdef _GLIBCXX_LONG_DOUBLE_ALT128_COMPAT
+if constexpr (is_same_v)
+  len = __sprintfieee128(buffer, format_string, value, args...);
+else
+#endif
+len = sprintf(buffer, format_string, value, args...);


I messed up the order of these arguments here, which caused PR 99261.

Fixed with this patch, tested powerpc64*-linux and x86_64-linux.
Committed to trunk.


commit 94bfe81afedb6dbba877ee7c9f047375366f8996
Author: Jonathan Wakely 
Date:   Wed Feb 24 22:25:31 2021

libstdc++: Fix order of arguments to sprintf [PR 99261]

libstdc++-v3/ChangeLog:

PR libstdc++/99261
* src/c++17/floating_to_chars.cc (sprintf_ld): Add extra args
before value to be printed.

diff --git a/libstdc++-v3/src/c++17/floating_to_chars.cc b/libstdc++-v3/src/c++17/floating_to_chars.cc
index f1512017aa3..611747bb99e 100644
--- a/libstdc++-v3/src/c++17/floating_to_chars.cc
+++ b/libstdc++-v3/src/c++17/floating_to_chars.cc
@@ -870,10 +870,10 @@ namespace
 
 #ifdef _GLIBCXX_LONG_DOUBLE_ALT128_COMPAT
 if constexpr (is_same_v)
-  len = __sprintfieee128(buffer, format_string, value, args...);
+  len = __sprintfieee128(buffer, format_string, args..., value);
 else
 #endif
-len = sprintf(buffer, format_string, value, args...);
+len = sprintf(buffer, format_string, args..., value);
 
 #if _GLIBCXX_USE_C99_FENV_TR1 && defined(FE_TONEAREST)
 if (saved_rounding_mode != FE_TONEAREST)


[PATCH] avoid -Wnonull for dynamic_cast (PR 99251)

2021-02-24 Thread Martin Sebor via Gcc-patches

In r11-6900 (PR 98646 - static_cast confuses -Wnonnull) we decided
that issuing -Wnonnull for dereferencing the result of dynamic_cast
was helpful despite the false positives it causes when the pointer
is guaranteed not to be null because of a prior test.

The test case in PR 99251 along with the feedback I got from Martin
Liska have convinced me it was not the right decision.

The attached patch arranges for dynamic_cast to also suppress -Wnonnull
analogously to static_cast.  Since there already is a helper function
that builds the if-not-null test (ifnonnull) and sets TREE_NO_WARNING,
I factored out the corresponding code from build_base_path that sets
the additional TREE_NO_WARNING bit for static_cast into the function
and called it from both places.  I also renamed the function to make
its purpose clearer and for consistency with other build_xxx APIs.

Tested on x86_64-linux.

Martin
PR c++/99251 - inconsistent -Wnonnull warning behaviour with dynamic_cast

gcc/cp/ChangeLog:

	PR c++/99251
	* class.c (build_base_path): Call build_nonnull_test.
	* cp-tree.h (build_nonnull_test): Declare.
	* rtti.c (ifnonnull): Rename...
	(build_nonnull_test): ...to this.  Set no-warning bit on COND_EXPR.
	(build_dynamic_cast_1): Adjust to name change.

gcc/testsuite/ChangeLog:

	PR c++/99251
	* g++.dg/warn/Wnonnull9.C: Expect no warnings.
	* g++.dg/warn/Wnonnull12.C: New test.

diff --git a/gcc/cp/class.c b/gcc/cp/class.c
index 40f5fef7baa..6c6e0564bf9 100644
--- a/gcc/cp/class.c
+++ b/gcc/cp/class.c
@@ -402,16 +402,9 @@ build_base_path (enum tree_code code,
   if (TREE_SIDE_EFFECTS (expr) && (null_test || virtual_access))
 expr = save_expr (expr);
 
-  /* Now that we've saved expr, build the real null test.  */
+  /* Store EXPR and build the real null test just before returning.  */
   if (null_test)
-{
-  tree zero = cp_convert (TREE_TYPE (expr), nullptr_node, complain);
-  null_test = build2_loc (input_location, NE_EXPR, boolean_type_node,
-			  expr, zero);
-  /* This is a compiler generated comparison, don't emit
-	 e.g. -Wnonnull-compare warning for it.  */
-  TREE_NO_WARNING (null_test) = 1;
-}
+null_test = expr;
 
   /* If this is a simple base reference, express it as a COMPONENT_REF.  */
   if (code == PLUS_EXPR && !virtual_access
@@ -516,14 +509,8 @@ build_base_path (enum tree_code code,
 
  out:
   if (null_test)
-{
-  expr = fold_build3_loc (input_location, COND_EXPR, target_type, null_test,
-			  expr, build_zero_cst (target_type));
-  /* Avoid warning for the whole conditional expression (in addition
-	 to NULL_TEST itself -- see above) in case the result is used in
-	 a nonnull context that the front end -Wnonnull checks.  */
-  TREE_NO_WARNING (expr) = 1;
-}
+/* Wrap EXPR in a null test.  */
+expr = build_nonnull_test (null_test, expr, complain);
 
   return expr;
 }
diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index 38b31e3908f..8c6cda8d1a6 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -7271,6 +7271,7 @@ extern void emit_support_tinfos			(void);
 extern bool emit_tinfo_decl			(tree);
 extern unsigned get_pseudo_tinfo_index		(tree);
 extern tree get_pseudo_tinfo_type		(unsigned);
+extern tree build_nonnull_test			(tree, tree, tsubst_flags_t);
 
 /* in search.c */
 extern tree get_parent_with_private_access 	(tree decl, tree binfo);
diff --git a/gcc/cp/rtti.c b/gcc/cp/rtti.c
index b41d95469c6..84482743392 100644
--- a/gcc/cp/rtti.c
+++ b/gcc/cp/rtti.c
@@ -121,7 +121,6 @@ vec *unemitted_tinfo_decls;
and are generated as needed. */
 static GTY (()) vec *tinfo_descs;
 
-static tree ifnonnull (tree, tree, tsubst_flags_t);
 static tree tinfo_name (tree, bool);
 static tree build_dynamic_cast_1 (location_t, tree, tree, tsubst_flags_t);
 static tree throw_bad_cast (void);
@@ -529,16 +528,23 @@ get_typeid (tree type, tsubst_flags_t complain)
 /* Check whether TEST is null before returning RESULT.  If TEST is used in
RESULT, it must have previously had a save_expr applied to it.  */
 
-static tree
-ifnonnull (tree test, tree result, tsubst_flags_t complain)
+tree
+build_nonnull_test (tree test, tree result, tsubst_flags_t complain)
 {
-  tree cond = build2 (NE_EXPR, boolean_type_node, test,
-		  cp_convert (TREE_TYPE (test), nullptr_node, complain));
+  tree null_ptr = cp_convert (TREE_TYPE (test), nullptr_node, complain);
+  tree cond = build2 (NE_EXPR, boolean_type_node, test, null_ptr);
+
   /* This is a compiler generated comparison, don't emit
  e.g. -Wnonnull-compare warning for it.  */
   TREE_NO_WARNING (cond) = 1;
-  return build3 (COND_EXPR, TREE_TYPE (result), cond, result,
-		 cp_convert (TREE_TYPE (result), nullptr_node, complain));
+
+  null_ptr = cp_convert (TREE_TYPE (result), nullptr_node, complain);
+  cond = build3 (COND_EXPR, TREE_TYPE (result), cond, result, null_ptr);
+
+  /* Likewise, don't emit -Wnonnull for using the result to call
+ a member function.  */
+  TREE_NO_WARNING (cond) = 1;

Re: [PATCH] libstdc++: Fix __floating_to_chars_precision for __float128

2021-02-24 Thread Jonathan Wakely via Gcc-patches

On 24/02/21 17:14 -0500, Patrick Palka via Libstdc++ wrote:

The code path in __floating_to_chars_precision for handling long double
by going through printf now also handles __float128, so the condition
that guards this code path needs to be updated accordingly.

Tested on x86_64-pc-linux-gnu (i.e. it compiles :)), does this look OK
for trunk?


Yes, Thanks.


libstdc++-v3/ChangeLog:

* src/c++17/floating_to_chars.cc (__floating_to_chars_precision):
Relax the condition that guards the printf code path to accept
F128_type as well.
---
libstdc++-v3/src/c++17/floating_to_chars.cc | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libstdc++-v3/src/c++17/floating_to_chars.cc 
b/libstdc++-v3/src/c++17/floating_to_chars.cc
index aea96e08df1..f1512017aa3 100644
--- a/libstdc++-v3/src/c++17/floating_to_chars.cc
+++ b/libstdc++-v3/src/c++17/floating_to_chars.cc
@@ -1151,7 +1151,7 @@ template

// Ryu doesn't support formatting floating-point types larger than double
// with an explicit precision, so instead we just go through printf.
-if constexpr (is_same_v)
+if constexpr (is_same_v || is_same_v)
  {
int effective_precision;
const char* output_specifier;
--
2.30.1.602.g966e671106





[PATCH] libstdc++: Fix __floating_to_chars_precision for __float128

2021-02-24 Thread Patrick Palka via Gcc-patches
The code path in __floating_to_chars_precision for handling long double
by going through printf now also handles __float128, so the condition
that guards this code path needs to be updated accordingly.

Tested on x86_64-pc-linux-gnu (i.e. it compiles :)), does this look OK
for trunk?

libstdc++-v3/ChangeLog:

* src/c++17/floating_to_chars.cc (__floating_to_chars_precision):
Relax the condition that guards the printf code path to accept
F128_type as well.
---
 libstdc++-v3/src/c++17/floating_to_chars.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libstdc++-v3/src/c++17/floating_to_chars.cc 
b/libstdc++-v3/src/c++17/floating_to_chars.cc
index aea96e08df1..f1512017aa3 100644
--- a/libstdc++-v3/src/c++17/floating_to_chars.cc
+++ b/libstdc++-v3/src/c++17/floating_to_chars.cc
@@ -1151,7 +1151,7 @@ template
 
 // Ryu doesn't support formatting floating-point types larger than double
 // with an explicit precision, so instead we just go through printf.
-if constexpr (is_same_v)
+if constexpr (is_same_v || is_same_v)
   {
int effective_precision;
const char* output_specifier;
-- 
2.30.1.602.g966e671106



[PATCH 6/7] [og10] openacc: Fix lowering for derived-type mappings through array elements

2021-02-24 Thread Julian Brown
This patch fixes lowering of derived-type mappings which select elements
of arrays of derived types, and similar. These would previously lead
to ICEs.

With this change, OpenACC directives can pass through constructs that
are no longer recognized by the gimplifier, hence alterations are needed
there also.

gcc/fortran/
* trans-openmp.c (gfc_trans_omp_clauses): Handle element selection
for arrays of derived types.

gcc/
* gimplify.c (gimplify_scan_omp_clauses): Handle ATTACH_DETACH
for non-decls.

gcc/testsuite/
* gfortran.dg/goacc/array-with-dt-1.f90: New test.
* gfortran.dg/goacc/array-with-dt-3.f90: Likewise.
* gfortran.dg/goacc/array-with-dt-4.f90: Likewise.
* gfortran.dg/goacc/array-with-dt-5.f90: Likewise.
* gfortran.dg/goacc/derived-chartypes-1.f90: Re-enable test.
* gfortran.dg/goacc/derived-chartypes-2.f90: Likewise.
* gfortran.dg/goacc/derived-classtypes-1.f95: Uncomment
previously-broken directives.

libgomp/
* testsuite/libgomp.oacc-fortran/derivedtypes-arrays-1.f90: New test.
* testsuite/libgomp.oacc-fortran/update-dt-array.f90: Likewise.

(cherry picked from commit d28f3da11d8c0aed9b746689d723022a9b5ec04c)
---
 gcc/ChangeLog.omp |   7 +
 gcc/fortran/ChangeLog.omp |   7 +
 gcc/fortran/trans-openmp.c| 192 ++
 gcc/gimplify.c|  12 ++
 gcc/testsuite/ChangeLog.omp   |  13 ++
 .../gfortran.dg/goacc/array-with-dt-1.f90 |  11 +
 .../gfortran.dg/goacc/array-with-dt-3.f90 |  14 ++
 .../gfortran.dg/goacc/array-with-dt-4.f90 |  18 ++
 .../gfortran.dg/goacc/array-with-dt-5.f90 |  12 ++
 .../gfortran.dg/goacc/derived-chartypes-1.f90 |   3 -
 .../gfortran.dg/goacc/derived-chartypes-2.f90 |   3 -
 .../goacc/derived-classtypes-1.f95|   8 +-
 libgomp/ChangeLog.omp |   7 +
 .../derivedtypes-arrays-1.f90 | 109 ++
 .../libgomp.oacc-fortran/update-dt-array.f90  |  53 +
 15 files changed, 378 insertions(+), 91 deletions(-)
 create mode 100644 gcc/testsuite/gfortran.dg/goacc/array-with-dt-1.f90
 create mode 100644 gcc/testsuite/gfortran.dg/goacc/array-with-dt-3.f90
 create mode 100644 gcc/testsuite/gfortran.dg/goacc/array-with-dt-4.f90
 create mode 100644 gcc/testsuite/gfortran.dg/goacc/array-with-dt-5.f90
 create mode 100644 
libgomp/testsuite/libgomp.oacc-fortran/derivedtypes-arrays-1.f90
 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/update-dt-array.f90

diff --git a/gcc/ChangeLog.omp b/gcc/ChangeLog.omp
index ba959fb37a4f..a59c25b79763 100644
--- a/gcc/ChangeLog.omp
+++ b/gcc/ChangeLog.omp
@@ -1,3 +1,10 @@
+2021-02-24  Julian Brown  
+
+   Backport from mainline
+
+   * gimplify.c (gimplify_scan_omp_clauses): Handle ATTACH_DETACH
+   for non-decls.
+
 2021-02-16  Tobias Burnus  
 
* doc/invoke.texi (nvptx's -misa): Update default to sm_35.
diff --git a/gcc/fortran/ChangeLog.omp b/gcc/fortran/ChangeLog.omp
index f99a11316f52..007855075563 100644
--- a/gcc/fortran/ChangeLog.omp
+++ b/gcc/fortran/ChangeLog.omp
@@ -1,3 +1,10 @@
+2021-02-24  Julian Brown  
+
+   Backport from mainline
+
+   * trans-openmp.c (gfc_trans_omp_clauses): Handle element selection
+   for arrays of derived types.
+
 2021-02-24  Tobias Burnus  
 
Backport from mainline
diff --git a/gcc/fortran/trans-openmp.c b/gcc/fortran/trans-openmp.c
index d0e299b02142..e3df4bbf84ec 100644
--- a/gcc/fortran/trans-openmp.c
+++ b/gcc/fortran/trans-openmp.c
@@ -2660,6 +2660,32 @@ gfc_trans_omp_clauses (stmtblock_t *block, 
gfc_omp_clauses *clauses,
  tree decl = gfc_trans_omp_variable (n->sym, false);
  if (DECL_P (decl))
TREE_ADDRESSABLE (decl) = 1;
+
+ gfc_ref *lastref = NULL;
+
+ if (n->expr)
+   for (gfc_ref *ref = n->expr->ref; ref; ref = ref->next)
+ if (ref->type == REF_COMPONENT || ref->type == REF_ARRAY)
+   lastref = ref;
+
+ bool allocatable = false, pointer = false;
+
+ if (lastref && lastref->type == REF_COMPONENT)
+   {
+ gfc_component *c = lastref->u.c.component;
+
+ if (c->ts.type == BT_CLASS)
+   {
+ pointer = CLASS_DATA (c)->attr.class_pointer;
+ allocatable = CLASS_DATA (c)->attr.allocatable;
+   }
+ else
+   {
+ pointer = c->attr.pointer;
+ allocatable = c->attr.allocatable;
+   }
+   }
+
  if (n->expr == NULL
  || (n->expr->ref->type == REF_ARRAY
  && n->expr->ref->u.ar.type == AR_FULL))
@@ -2887,74 +2913,79 @@ gfc_trans_omp_clauses (stmtblock_t *block, 
gfc_omp_clauses *clauses,
   

[PATCH 7/7] [og10] openacc: Strided array sections and components of derived-type arrays

2021-02-24 Thread Julian Brown
This patch disallows selecting components of array sections in update
directives for OpenACC, as specified in OpenACC 3.0, "2.14.4. Update
Directive":

  In Fortran, members of variables of derived type may appear, including
  a subarray of a member. Members of subarrays of derived type may
  not appear.

The diagnostic for attempting to use the same construct on other
directives has also been improved.

gcc/fortran/
* openmp.c (resolve_omp_clauses): Disallow selecting components
of arrays of derived type.

gcc/testsuite/
* gfortran.dg/goacc/array-with-dt-2.f90: Remove expected errors.
* gfortran.dg/goacc/array-with-dt-6.f90: New test.
* gfortran.dg/goacc/mapping-tests-2.f90: Update expected error.
* gfortran.dg/goacc/ref_inquiry.f90: Update expected errors.
* gfortran.dg/gomp/ref_inquiry.f90: Likewise.

libgomp/
* testsuite/libgomp.oacc-fortran/array-stride-dt-1.f90: Remove
expected errors.

(cherry picked from commit 366cf1127a547ff77024a551abb01bb1a6e963cd)
---
 gcc/fortran/ChangeLog.omp |  7 ++
 gcc/fortran/openmp.c  | 64 +++
 gcc/testsuite/ChangeLog.omp   | 10 +++
 .../gfortran.dg/goacc/array-with-dt-2.f90 |  5 +-
 .../gfortran.dg/goacc/array-with-dt-6.f90 | 10 +++
 .../gfortran.dg/goacc/mapping-tests-2.f90 |  4 +-
 .../gfortran.dg/goacc/ref_inquiry.f90 |  8 ---
 .../gfortran.dg/gomp/ref_inquiry.f90  |  4 --
 libgomp/ChangeLog.omp |  7 ++
 .../array-stride-dt-1.f90 |  5 +-
 10 files changed, 78 insertions(+), 46 deletions(-)
 create mode 100644 gcc/testsuite/gfortran.dg/goacc/array-with-dt-6.f90

diff --git a/gcc/fortran/ChangeLog.omp b/gcc/fortran/ChangeLog.omp
index 007855075563..45c68a38914e 100644
--- a/gcc/fortran/ChangeLog.omp
+++ b/gcc/fortran/ChangeLog.omp
@@ -1,3 +1,10 @@
+2021-02-24  Julian Brown  
+
+   Backport from mainline
+
+   * openmp.c (resolve_omp_clauses): Disallow selecting components
+   of arrays of derived type.
+
 2021-02-24  Julian Brown  
 
Backport from mainline
diff --git a/gcc/fortran/openmp.c b/gcc/fortran/openmp.c
index 8d77f9e73510..7085caf772e1 100644
--- a/gcc/fortran/openmp.c
+++ b/gcc/fortran/openmp.c
@@ -4940,17 +4940,31 @@ resolve_omp_clauses (gfc_code *code, gfc_omp_clauses 
*omp_clauses,
 "are allowed on ORDERED directive at %L",
 >where);
  }
-   gfc_ref *array_ref = NULL;
+   gfc_ref *lastref = NULL, *lastslice = NULL;
bool resolved = false;
if (n->expr)
  {
-   array_ref = n->expr->ref;
+   lastref = n->expr->ref;
resolved = gfc_resolve_expr (n->expr);
 
/* Look through component refs to find last array
   reference.  */
if (resolved)
  {
+   for (gfc_ref *ref = n->expr->ref; ref; ref = ref->next)
+ if (ref->type == REF_COMPONENT
+ || ref->type == REF_SUBSTRING
+ || ref->type == REF_INQUIRY)
+   lastref = ref;
+ else if (ref->type == REF_ARRAY)
+   {
+ for (int i = 0; i < ref->u.ar.dimen; i++)
+   if (ref->u.ar.dimen_type[i] == DIMEN_RANGE)
+ lastslice = ref;
+
+ lastref = ref;
+   }
+
/* The "!$acc cache" directive allows rectangular
   subarrays to be specified, with some restrictions
   on the form of bounds (not implemented).
@@ -4958,53 +4972,51 @@ resolve_omp_clauses (gfc_code *code, gfc_omp_clauses 
*omp_clauses,
   array isn't contiguous.  An expression such as
   arr(-n:n,-n:n) could be contiguous even if it looks
   like it may not be.  */
-   if (list != OMP_LIST_CACHE
+   if (code->op != EXEC_OACC_UPDATE
+   && list != OMP_LIST_CACHE
&& list != OMP_LIST_DEPEND
&& !gfc_is_simply_contiguous (n->expr, false, true)
-   && gfc_is_not_contiguous (n->expr))
+   && gfc_is_not_contiguous (n->expr)
+   && !(lastslice
+&& (lastslice->next
+|| lastslice->type != REF_ARRAY)))
  gfc_error ("Array is not contiguous at %L",
 >where);
-
-   while (array_ref
-  

[PATCH 5/7] [og10] Fortran: %re/%im fixes for OpenMP/OpenACC + gfc_is_simplify_contiguous

2021-02-24 Thread Julian Brown
From: Tobias Burnus 

gcc/fortran/ChangeLog:

* expr.c (gfc_is_simplify_contiguous): Handle REF_INQUIRY, i.e.
%im and %re which are EXPR_VARIABLE.
* openmp.c (resolve_omp_clauses): Diagnose %re/%im explicitly.

gcc/testsuite/ChangeLog:

* gfortran.dg/goacc/ref_inquiry.f90: New test.
* gfortran.dg/gomp/ref_inquiry.f90: New test.

(cherry picked from commit 799478b8914c438f7a33eb319efbae69c81f2111)
---
 gcc/fortran/ChangeLog.omp |  8 +++
 gcc/fortran/expr.c|  2 +
 gcc/fortran/openmp.c  |  8 +++
 gcc/testsuite/ChangeLog.omp   |  7 +++
 .../gfortran.dg/goacc/ref_inquiry.f90 | 56 +++
 .../gfortran.dg/gomp/ref_inquiry.f90  | 39 +
 6 files changed, 120 insertions(+)
 create mode 100644 gcc/testsuite/gfortran.dg/goacc/ref_inquiry.f90
 create mode 100644 gcc/testsuite/gfortran.dg/gomp/ref_inquiry.f90

diff --git a/gcc/fortran/ChangeLog.omp b/gcc/fortran/ChangeLog.omp
index 09c2bb855c88..f99a11316f52 100644
--- a/gcc/fortran/ChangeLog.omp
+++ b/gcc/fortran/ChangeLog.omp
@@ -1,3 +1,11 @@
+2021-02-24  Tobias Burnus  
+
+   Backport from mainline
+
+   * expr.c (gfc_is_simplify_contiguous): Handle REF_INQUIRY, i.e.
+   %im and %re which are EXPR_VARIABLE.
+   * openmp.c (resolve_omp_clauses): Diagnose %re/%im explicitly.
+
 2021-02-24  Tobias Burnus  
 
Backport from mainline
diff --git a/gcc/fortran/expr.c b/gcc/fortran/expr.c
index 569f4d9bf066..6cda947cd568 100644
--- a/gcc/fortran/expr.c
+++ b/gcc/fortran/expr.c
@@ -5837,6 +5837,8 @@ gfc_is_simply_contiguous (gfc_expr *expr, bool strict, 
bool permit_element)
part_ref  = ref;
   else if (ref->type == REF_SUBSTRING)
return false;
+  else if (ref->type == REF_INQUIRY)
+   return false;
   else if (ref->u.ar.type != AR_ELEMENT)
ar = >u.ar;
 }
diff --git a/gcc/fortran/openmp.c b/gcc/fortran/openmp.c
index a7592f0545d9..8d77f9e73510 100644
--- a/gcc/fortran/openmp.c
+++ b/gcc/fortran/openmp.c
@@ -4984,6 +4984,14 @@ resolve_omp_clauses (gfc_code *code, gfc_omp_clauses 
*omp_clauses,
&& array_ref->next->type == REF_SUBSTRING)))
  gfc_error ("Unexpected substring reference in %s clause "
 "at %L", name, >where);
+   else if (array_ref && array_ref->type == REF_INQUIRY)
+ {
+   gcc_assert (array_ref->u.i == INQUIRY_RE
+   || array_ref->u.i == INQUIRY_IM);
+   gfc_error ("Unexpected complex-parts designator "
+  "reference in %s clause at %L",
+  name, >where);
+ }
else if (!resolved
|| n->expr->expr_type != EXPR_VARIABLE
|| array_ref->next
diff --git a/gcc/testsuite/ChangeLog.omp b/gcc/testsuite/ChangeLog.omp
index d012e9e75b4e..257981890982 100644
--- a/gcc/testsuite/ChangeLog.omp
+++ b/gcc/testsuite/ChangeLog.omp
@@ -1,3 +1,10 @@
+2021-02-24  Tobias Burnus  
+
+   Backport from mainline
+
+   * gfortran.dg/goacc/ref_inquiry.f90: New test.
+   * gfortran.dg/gomp/ref_inquiry.f90: New test.
+
 2021-02-24  Tobias Burnus  
 
Backport from mainline
diff --git a/gcc/testsuite/gfortran.dg/goacc/ref_inquiry.f90 
b/gcc/testsuite/gfortran.dg/goacc/ref_inquiry.f90
new file mode 100644
index ..69dd38e51974
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/goacc/ref_inquiry.f90
@@ -0,0 +1,56 @@
+! Check for %re, ...%im, ...%kind, ...%len
+! Cf. also OpenMP's ../gomp/ref_inquiry.f90
+! Cf. OpenACC spec issue 346
+! 
+implicit none
+type t
+  integer :: i
+  character :: c
+  complex :: z
+  complex :: zz(5)
+end type t
+
+integer :: i
+character(kind=4, len=5) :: c
+complex :: z, zz(5)
+type(t) :: x
+
+print *, is_contiguous(zz(:)%re)
+
+! inquiry function; expr_type != EXPR_VARIABLE:
+!$acc enter data copyin(i%kind, c%len) ! { dg-error "not a proper array 
section" }
+!$acc enter data copyin(x%i%kind)  ! { dg-error "not a proper array 
section" }
+!$acc enter data copyin(x%c%len)   ! { dg-error "not a proper array 
section" }
+!$acc update self(i%kind, c%len)   ! { dg-error "not a proper array 
section" }
+!$acc update self(x%i%kind)! { dg-error "not a proper array 
section" }
+!$acc update self(x%c%len) ! { dg-error "not a proper array 
section" }
+
+! EXPR_VARIABLE
+!$acc enter data copyin(z%re)! { dg-error "Unexpected complex-parts 
designator" }
+!$acc enter data copyin(z%im)! { dg-error "Unexpected complex-parts 
designator" }
+!$acc enter data copyin(zz%re)   ! { dg-error "not a proper array section" }
+ ! { dg-error "Array is not contiguous" "" { 
target *-*-* } .-1 }
+!$acc 

[PATCH 4/7] [og10] Fortran: OpenMP/OpenACC diagnose substring rejections better

2021-02-24 Thread Julian Brown
From: Tobias Burnus 

gcc/fortran/ChangeLog:

* openmp.c (resolve_omp_clauses): Explicitly diagnose
substrings as not permitted.

gcc/testsuite/ChangeLog:

* gfortran.dg/goacc/substring.f90: New test.
* gfortran.dg/gomp/substring.f90: New test.

(cherry picked from commit f0e618faeb619ec02dabbef203a5575fca44a7f7)
---
 gcc/fortran/ChangeLog.omp |  7 +
 gcc/fortran/openmp.c  |  8 +-
 gcc/testsuite/ChangeLog.omp   |  7 +
 gcc/testsuite/gfortran.dg/goacc/substring.f90 | 27 +++
 gcc/testsuite/gfortran.dg/gomp/substring.f90  | 22 +++
 5 files changed, 70 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gfortran.dg/goacc/substring.f90
 create mode 100644 gcc/testsuite/gfortran.dg/gomp/substring.f90

diff --git a/gcc/fortran/ChangeLog.omp b/gcc/fortran/ChangeLog.omp
index 2dd82a70cb71..09c2bb855c88 100644
--- a/gcc/fortran/ChangeLog.omp
+++ b/gcc/fortran/ChangeLog.omp
@@ -1,3 +1,10 @@
+2021-02-24  Tobias Burnus  
+
+   Backport from mainline
+
+   * openmp.c (resolve_omp_clauses): Explicitly diagnose
+   substrings as not permitted.
+
 2021-02-24  Julian Brown  
 
Backport from mainline
diff --git a/gcc/fortran/openmp.c b/gcc/fortran/openmp.c
index 61a340d7f396..a7592f0545d9 100644
--- a/gcc/fortran/openmp.c
+++ b/gcc/fortran/openmp.c
@@ -4978,7 +4978,13 @@ resolve_omp_clauses (gfc_code *code, gfc_omp_clauses 
*omp_clauses,
|| (n->expr
&& (!resolved || n->expr->expr_type != EXPR_VARIABLE)))
  {
-   if (!resolved
+   if (array_ref
+   && (array_ref->type == REF_SUBSTRING
+   || (array_ref->next
+   && array_ref->next->type == REF_SUBSTRING)))
+ gfc_error ("Unexpected substring reference in %s clause "
+"at %L", name, >where);
+   else if (!resolved
|| n->expr->expr_type != EXPR_VARIABLE
|| array_ref->next
|| array_ref->type != REF_ARRAY)
diff --git a/gcc/testsuite/ChangeLog.omp b/gcc/testsuite/ChangeLog.omp
index e04a4ee3ebff..d012e9e75b4e 100644
--- a/gcc/testsuite/ChangeLog.omp
+++ b/gcc/testsuite/ChangeLog.omp
@@ -1,3 +1,10 @@
+2021-02-24  Tobias Burnus  
+
+   Backport from mainline
+
+   * gfortran.dg/goacc/substring.f90: New test.
+   * gfortran.dg/gomp/substring.f90: New test.
+
 2021-02-24  Julian Brown  
 
Backport from mainline
diff --git a/gcc/testsuite/gfortran.dg/goacc/substring.f90 
b/gcc/testsuite/gfortran.dg/goacc/substring.f90
new file mode 100644
index ..25031daddf33
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/goacc/substring.f90
@@ -0,0 +1,27 @@
+implicit none
+character(len=10) :: str1, str2(5,5)
+
+type t
+  character(len=10) :: str1, str2(5,5)
+end type t
+type(t) :: v
+
+!$acc enter data copyin(v%str1)   ! OK
+!$acc enter data copyin(v%str2)   ! OK
+!$acc enter data copyin(v%str2(1,2))  ! OK
+!$acc enter data copyin(str1) ! OK
+!$acc enter data copyin(str2) ! OK
+!$acc enter data copyin(str2(1,2))! OK
+
+!$acc enter data copyin(v%str1(2:5))   ! { dg-error "Unexpected substring 
reference in MAP clause" }
+!$acc enter data copyin(v%str2(1,2)(2:4))  ! { dg-error "Unexpected substring 
reference in MAP clause" }
+!$acc enter data copyin(str1(2:5)) ! { dg-error "Unexpected substring 
reference in MAP clause" }
+!$acc enter data copyin(str2(1,2)(2:4))! { dg-error "Unexpected substring 
reference in MAP clause" }
+
+!$acc parallel
+!$acc update host(v%str1(2:5)) ! { dg-error "Unexpected substring 
reference in MAP clause" }
+!$acc update host(v%str2(1,2)(2:4))! { dg-error "Unexpected substring 
reference in MAP clause" }
+!$acc update host(str1(2:5))   ! { dg-error "Unexpected substring 
reference in MAP clause" }
+!$acc update host(str2(1,2)(2:4))  ! { dg-error "Unexpected substring 
reference in MAP clause" }
+!$acc end parallel
+end
diff --git a/gcc/testsuite/gfortran.dg/gomp/substring.f90 
b/gcc/testsuite/gfortran.dg/gomp/substring.f90
new file mode 100644
index ..23d7fb7e48ab
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/gomp/substring.f90
@@ -0,0 +1,22 @@
+implicit none
+character(len=10) :: str1, str2(5,5)
+
+type t
+  character(len=10) :: str1, str2(5,5)
+end type t
+type(t) :: v
+
+!$omp target enter data map(to: str1)  ! OK
+!$omp target enter data map(to: str2)  ! OK
+!$omp target enter data map(to: str2(2,5)) ! OK
+
+!$omp target enter data map(to: str1(2,5)) ! { dg-error "Syntax error 
in OpenMP variable list" }
+!$omp target enter data map(to: str2(1,2)(2:4))! { dg-error "Unexpected 
substring reference in MAP clause" }
+
+!$omp target enter data map(to: v%str1)   ! OK

[PATCH 2/7] [og10] openacc: Use class_pointer instead of pointer attribute for class types

2021-02-24 Thread Julian Brown
Elsewhere in the Fortran front-end, the class_pointer attribute is
used for BT_CLASS entities instead of the pointer attribute. This patch
follows suit for OpenACC. I couldn't actually come up with a test case
where this makes a difference (i.e., where "class_pointer" and "pointer"
have different values at this point in the code), but this may nonetheless
fix a latent bug.

gcc/fortran/
* trans-openmp.c (gfc_trans_omp_clauses): Use class_pointer attribute
for BT_CLASS.

(cherry picked from commit f743fe231663e32d52db987650d0ec3381a777af)
---
 gcc/fortran/ChangeLog.omp  | 7 +++
 gcc/fortran/trans-openmp.c | 5 -
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/gcc/fortran/ChangeLog.omp b/gcc/fortran/ChangeLog.omp
index 9e0cf7198acf..2dd82a70cb71 100644
--- a/gcc/fortran/ChangeLog.omp
+++ b/gcc/fortran/ChangeLog.omp
@@ -1,3 +1,10 @@
+2021-02-24  Julian Brown  
+
+   Backport from mainline
+
+   * trans-openmp.c (gfc_trans_omp_clauses): Use class_pointer attribute
+   for BT_CLASS.
+
 2021-02-24  Julian Brown  
 
Backport from mainline
diff --git a/gcc/fortran/trans-openmp.c b/gcc/fortran/trans-openmp.c
index e53f7ebb7d7f..d0e299b02142 100644
--- a/gcc/fortran/trans-openmp.c
+++ b/gcc/fortran/trans-openmp.c
@@ -2973,7 +2973,10 @@ gfc_trans_omp_clauses (stmtblock_t *block, 
gfc_omp_clauses *clauses,
  if (lastcomp->u.c.component->ts.type == BT_DERIVED
  || lastcomp->u.c.component->ts.type == BT_CLASS)
{
- if (sym_attr.pointer || (openacc && sym_attr.allocatable))
+ bool pointer
+   = (lastcomp->u.c.component->ts.type == BT_CLASS
+  ? sym_attr.class_pointer : sym_attr.pointer);
+ if (pointer || (openacc && sym_attr.allocatable))
{
  tree data, size;
 
-- 
2.29.2



[PATCH 3/7] [og10] openacc: Character types and mixed arrays/derived type tests

2021-02-24 Thread Julian Brown
This patch adds some tests for character types that are components
of derived types used in OpenACC data-movement clauses (some of which
currently fail and are thus XFAILed), and tests (also XFAILed) mixing
arrays and derived types.  The XFAILs are addressed by follow-on patches.
Originally a combination of several mainline patches.

(cherry picked from commit b2d84e9f9cccbe4ee662f7002b83105629d09939)
(cherry picked from commit 9a4d32f85ccebc0ee4b24e6d9d7a4f11c04d7146)
(cherry picked from commit b0fb2720d88d680af18981a2097397196b505a1f)
(cherry picked from commit f7fb2f662fe12f327ece8b034ab76b36fdca4696)

gcc/testsuite/
* gfortran.dg/goacc/array-with-dt-2.f90: New test.
* gfortran.dg/goacc/derived-chartypes-1.f90: Likewise.
* gfortran.dg/goacc/derived-chartypes-2.f90: Likewise.
* gfortran.dg/goacc/derived-chartypes-3.f90: Likewise.
* gfortran.dg/goacc/derived-chartypes-4.f90: Likewise.

libgomp/
* testsuite/libgomp.oacc-fortran/array-stride-dt-1.f90: New test.
---
 gcc/testsuite/ChangeLog.omp   |  10 ++
 .../gfortran.dg/goacc/array-with-dt-2.f90 |  11 ++
 .../gfortran.dg/goacc/derived-chartypes-1.f90 | 132 ++
 .../gfortran.dg/goacc/derived-chartypes-2.f90 | 132 ++
 .../gfortran.dg/goacc/derived-chartypes-3.f90 |  38 +
 .../gfortran.dg/goacc/derived-chartypes-4.f90 |  38 +
 libgomp/ChangeLog.omp |   6 +
 .../array-stride-dt-1.f90 |  45 ++
 8 files changed, 412 insertions(+)
 create mode 100644 gcc/testsuite/gfortran.dg/goacc/array-with-dt-2.f90
 create mode 100644 gcc/testsuite/gfortran.dg/goacc/derived-chartypes-1.f90
 create mode 100644 gcc/testsuite/gfortran.dg/goacc/derived-chartypes-2.f90
 create mode 100644 gcc/testsuite/gfortran.dg/goacc/derived-chartypes-3.f90
 create mode 100644 gcc/testsuite/gfortran.dg/goacc/derived-chartypes-4.f90
 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/array-stride-dt-1.f90

diff --git a/gcc/testsuite/ChangeLog.omp b/gcc/testsuite/ChangeLog.omp
index 8caef33f95e4..e04a4ee3ebff 100644
--- a/gcc/testsuite/ChangeLog.omp
+++ b/gcc/testsuite/ChangeLog.omp
@@ -1,3 +1,13 @@
+2021-02-24  Julian Brown  
+
+   Backport from mainline
+
+   * gfortran.dg/goacc/array-with-dt-2.f90: New test.
+   * gfortran.dg/goacc/derived-chartypes-1.f90: Likewise.
+   * gfortran.dg/goacc/derived-chartypes-2.f90: Likewise.
+   * gfortran.dg/goacc/derived-chartypes-3.f90: Likewise.
+   * gfortran.dg/goacc/derived-chartypes-4.f90: Likewise.
+
 2021-02-24  Julian Brown  
 
Backport from mainline
diff --git a/gcc/testsuite/gfortran.dg/goacc/array-with-dt-2.f90 
b/gcc/testsuite/gfortran.dg/goacc/array-with-dt-2.f90
new file mode 100644
index ..e4a6f319772c
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/goacc/array-with-dt-2.f90
@@ -0,0 +1,11 @@
+type t
+   integer, allocatable :: A(:,:)
+end type t
+
+type(t), allocatable :: b(:)
+
+! TODO: Remove expected errors when this is supported.
+!$acc update host(b(::2))  ! { dg-error "Stride should not be specified for 
array section in MAP clause" }
+!$acc update host(b(1)%A(::3,::4))  ! { dg-error "Stride should not be 
specified for array section in MAP clause" }
+end
+
diff --git a/gcc/testsuite/gfortran.dg/goacc/derived-chartypes-1.f90 
b/gcc/testsuite/gfortran.dg/goacc/derived-chartypes-1.f90
new file mode 100644
index ..f7aafbfc036f
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/goacc/derived-chartypes-1.f90
@@ -0,0 +1,132 @@
+! This currently ICEs. Avoid that.
+! { dg-skip-if "PR98979" { *-*-* } }
+
+type :: type1
+  character(len=35) :: a
+end type type1
+
+type :: type2
+  character(len=35), pointer :: b
+end type type2
+
+type :: aux1
+  character(len=22) :: y
+end type aux1
+
+type, extends(aux1) :: aux
+  character(len=33) :: x
+end type aux
+
+type :: type3
+  class(aux), pointer :: c(:)
+end type type3
+
+type :: type4
+  integer, pointer :: d(:)
+end type type4
+
+type :: type5
+  type(aux1) :: e
+end type type5
+
+type :: type6
+  type(aux1), pointer :: f
+end type type6
+
+type :: type7
+  class(aux), pointer :: g
+end type type7
+
+type(type1) :: foo
+type(type2) :: bar
+type(type3) :: qux
+type(type4) :: quux
+type(type5) :: fred
+type(type6) :: jim
+type(type7) :: shiela
+
+type(type1), pointer :: pfoo
+type(type2), pointer :: pbar
+type(type3), pointer :: pqux
+type(type4), pointer :: pquux
+type(type5), pointer :: pfred
+type(type6), pointer :: pjim
+type(type7), pointer :: pshiela
+
+class(type1), pointer :: cfoo
+class(type2), pointer :: cbar
+class(type3), pointer :: cqux
+class(type4), pointer :: cquux
+class(type5), pointer :: cfred
+class(type6), pointer :: cjim
+class(type7), pointer :: cshiela
+
+class(type1), allocatable :: acfoo
+class(type2), allocatable :: acbar
+class(type3), allocatable :: acqux
+class(type4), allocatable :: acquux
+class(type5), allocatable :: acfred
+class(type6), allocatable :: acjim

[PATCH 1/7] [og10] openacc: Dereference BT_CLASS data pointers but not BT_DERIVED pointers

2021-02-24 Thread Julian Brown
The stanza in gfc_trans_omp_clauses that handles derived type members
that are themselves derived type pointers or class pointers now adds
an explicit dereference only for the latter. The former is already
dereferenced transparently in gfc_conv_component_ref.

gcc/fortran/
* trans-openmp.c (gfc_trans_omp_clauses): Fix dereferencing for
BT_DERIVED members.

gcc/testsuite/
* gfortran.dg/goacc/derived-classtypes-1.f95: New test.

(cherry picked from commit cff6e8db880b6e262730b1ce9a9cb00c1f5571e2)
---
 gcc/fortran/ChangeLog.omp |   7 +
 gcc/fortran/trans-openmp.c|   7 +-
 gcc/testsuite/ChangeLog.omp   |   6 +
 .../goacc/derived-classtypes-1.f95| 129 ++
 4 files changed, 146 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gfortran.dg/goacc/derived-classtypes-1.f95

diff --git a/gcc/fortran/ChangeLog.omp b/gcc/fortran/ChangeLog.omp
index a02606190319..9e0cf7198acf 100644
--- a/gcc/fortran/ChangeLog.omp
+++ b/gcc/fortran/ChangeLog.omp
@@ -1,3 +1,10 @@
+2021-02-24  Julian Brown  
+
+   Backport from mainline
+
+   * trans-openmp.c (gfc_trans_omp_clauses): Fix dereferencing for
+   BT_DERIVED members.
+
 2021-02-12  Tobias Burnus  
 
Backport from mainline
diff --git a/gcc/fortran/trans-openmp.c b/gcc/fortran/trans-openmp.c
index 452e2a69baa2..e53f7ebb7d7f 100644
--- a/gcc/fortran/trans-openmp.c
+++ b/gcc/fortran/trans-openmp.c
@@ -2980,6 +2980,8 @@ gfc_trans_omp_clauses (stmtblock_t *block, 
gfc_omp_clauses *clauses,
  if (lastcomp->u.c.component->ts.type == BT_CLASS)
{
  data = gfc_class_data_get (inner);
+ gcc_assert (POINTER_TYPE_P (TREE_TYPE (data)));
+ data = build_fold_indirect_ref (data);
  size = gfc_class_vtab_size_get (inner);
}
  else  /* BT_DERIVED.  */
@@ -2988,8 +2990,7 @@ gfc_trans_omp_clauses (stmtblock_t *block, 
gfc_omp_clauses *clauses,
  size = TYPE_SIZE_UNIT (TREE_TYPE (inner));
}
 
- OMP_CLAUSE_DECL (node)
-   = build_fold_indirect_ref (data);
+ OMP_CLAUSE_DECL (node) = data;
  OMP_CLAUSE_SIZE (node) = size;
  node2 = build_omp_clause (input_location,
OMP_CLAUSE_MAP);
@@ -2997,7 +2998,7 @@ gfc_trans_omp_clauses (stmtblock_t *block, 
gfc_omp_clauses *clauses,
   openacc
   ? GOMP_MAP_ATTACH_DETACH
   : GOMP_MAP_ALWAYS_POINTER);
- OMP_CLAUSE_DECL (node2) = data;
+ OMP_CLAUSE_DECL (node2) = build_fold_addr_expr (data);
  OMP_CLAUSE_SIZE (node2) = size_int (0);
}
  else
diff --git a/gcc/testsuite/ChangeLog.omp b/gcc/testsuite/ChangeLog.omp
index 8f5512990b85..8caef33f95e4 100644
--- a/gcc/testsuite/ChangeLog.omp
+++ b/gcc/testsuite/ChangeLog.omp
@@ -1,3 +1,9 @@
+2021-02-24  Julian Brown  
+
+   Backport from mainline
+
+   * gfortran.dg/goacc/derived-classtypes-1.f95: New test.
+
 2021-02-12  Tobias Burnus  
 
Backport from mainline
diff --git a/gcc/testsuite/gfortran.dg/goacc/derived-classtypes-1.f95 
b/gcc/testsuite/gfortran.dg/goacc/derived-classtypes-1.f95
new file mode 100644
index ..e6cf09c6d3c1
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/goacc/derived-classtypes-1.f95
@@ -0,0 +1,129 @@
+type :: type1
+  integer :: a
+end type type1
+
+type :: type2
+  integer, pointer :: b
+end type type2
+
+type :: aux1
+  integer :: y
+end type aux1
+
+type, extends(aux1) :: aux
+  integer :: x
+end type aux
+
+type :: type3
+  class(aux), pointer :: c(:)
+end type type3
+
+type :: type4
+  integer, pointer :: d(:)
+end type type4
+
+type :: type5
+  type(aux) :: e
+end type type5
+
+type :: type6
+  type(aux), pointer :: f
+end type type6
+
+type :: type7
+  class(aux), pointer :: g
+end type type7
+
+type(type1) :: foo
+type(type2) :: bar
+type(type3) :: qux
+type(type4) :: quux
+type(type5) :: fred
+type(type6) :: jim
+type(type7) :: shiela
+
+type(type1), pointer :: pfoo
+type(type2), pointer :: pbar
+type(type3), pointer :: pqux
+type(type4), pointer :: pquux
+type(type5), pointer :: pfred
+type(type6), pointer :: pjim
+type(type7), pointer :: pshiela
+
+class(type1), pointer :: cfoo
+class(type2), pointer :: cbar
+class(type3), pointer :: cqux
+class(type4), pointer :: cquux
+class(type5), pointer :: cfred
+class(type6), pointer :: cjim
+class(type7), pointer :: cshiela
+
+class(type1), allocatable :: acfoo
+class(type2), allocatable 

[PATCH 0/7] [og10] openacc: Arrays/derived types/character type backports

2021-02-24 Thread Julian Brown
This series contains several backports from mainline to the og10 branch
concerning character and complex types and mixed derived types and array
accesses in OpenACC directive clauses.

Tested with offloading to AMD GCN. I will apply shortly.

Julian Brown (5):
  [og10] openacc: Dereference BT_CLASS data pointers but not BT_DERIVED
pointers
  [og10] openacc: Use class_pointer instead of pointer attribute for
class types
  [og10] openacc: Character types and mixed arrays/derived type tests
  [og10] openacc: Fix lowering for derived-type mappings through array
elements
  [og10] openacc: Strided array sections and components of derived-type
arrays

Tobias Burnus (2):
  [og10] Fortran: OpenMP/OpenACC diagnose substring rejections better
  [og10] Fortran: %re/%im fixes for OpenMP/OpenACC +
gfc_is_simplify_contiguous

 gcc/ChangeLog.omp |   7 +
 gcc/fortran/ChangeLog.omp |  43 
 gcc/fortran/expr.c|   2 +
 gcc/fortran/openmp.c  |  66 --
 gcc/fortran/trans-openmp.c| 196 ++
 gcc/gimplify.c|  12 ++
 gcc/testsuite/ChangeLog.omp   |  53 +
 .../gfortran.dg/goacc/array-with-dt-1.f90 |  11 +
 .../gfortran.dg/goacc/array-with-dt-2.f90 |  10 +
 .../gfortran.dg/goacc/array-with-dt-3.f90 |  14 ++
 .../gfortran.dg/goacc/array-with-dt-4.f90 |  18 ++
 .../gfortran.dg/goacc/array-with-dt-5.f90 |  12 ++
 .../gfortran.dg/goacc/array-with-dt-6.f90 |  10 +
 .../gfortran.dg/goacc/derived-chartypes-1.f90 | 129 
 .../gfortran.dg/goacc/derived-chartypes-2.f90 | 129 
 .../gfortran.dg/goacc/derived-chartypes-3.f90 |  38 
 .../gfortran.dg/goacc/derived-chartypes-4.f90 |  38 
 .../goacc/derived-classtypes-1.f95| 129 
 .../gfortran.dg/goacc/mapping-tests-2.f90 |   4 +-
 .../gfortran.dg/goacc/ref_inquiry.f90 |  48 +
 gcc/testsuite/gfortran.dg/goacc/substring.f90 |  27 +++
 .../gfortran.dg/gomp/ref_inquiry.f90  |  35 
 gcc/testsuite/gfortran.dg/gomp/substring.f90  |  22 ++
 libgomp/ChangeLog.omp |  20 ++
 .../array-stride-dt-1.f90 |  44 
 .../derivedtypes-arrays-1.f90 | 109 ++
 .../libgomp.oacc-fortran/update-dt-array.f90  |  53 +
 27 files changed, 1176 insertions(+), 103 deletions(-)
 create mode 100644 gcc/testsuite/gfortran.dg/goacc/array-with-dt-1.f90
 create mode 100644 gcc/testsuite/gfortran.dg/goacc/array-with-dt-2.f90
 create mode 100644 gcc/testsuite/gfortran.dg/goacc/array-with-dt-3.f90
 create mode 100644 gcc/testsuite/gfortran.dg/goacc/array-with-dt-4.f90
 create mode 100644 gcc/testsuite/gfortran.dg/goacc/array-with-dt-5.f90
 create mode 100644 gcc/testsuite/gfortran.dg/goacc/array-with-dt-6.f90
 create mode 100644 gcc/testsuite/gfortran.dg/goacc/derived-chartypes-1.f90
 create mode 100644 gcc/testsuite/gfortran.dg/goacc/derived-chartypes-2.f90
 create mode 100644 gcc/testsuite/gfortran.dg/goacc/derived-chartypes-3.f90
 create mode 100644 gcc/testsuite/gfortran.dg/goacc/derived-chartypes-4.f90
 create mode 100644 gcc/testsuite/gfortran.dg/goacc/derived-classtypes-1.f95
 create mode 100644 gcc/testsuite/gfortran.dg/goacc/ref_inquiry.f90
 create mode 100644 gcc/testsuite/gfortran.dg/goacc/substring.f90
 create mode 100644 gcc/testsuite/gfortran.dg/gomp/ref_inquiry.f90
 create mode 100644 gcc/testsuite/gfortran.dg/gomp/substring.f90
 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/array-stride-dt-1.f90
 create mode 100644 
libgomp/testsuite/libgomp.oacc-fortran/derivedtypes-arrays-1.f90
 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/update-dt-array.f90

-- 
2.29.2



[PATCH v3 6/6] RISC-V: Fix matches against subreg with a bytenum of 0 in riscv.md

2021-02-24 Thread Marcus Comstedt
These all intend the least significant subpart of the register.
Use the same endian-neutral "subreg_lowpart_operator" predicate that
ARM does instead.

gcc/
* config/riscv/predicates.md (subreg_lowpart_operator): New predicate
* config/riscv/riscv.md (*addsi3_extended2, *subsi3_extended2)
(*negsi2_extended2, *mulsi3_extended2, *si3_mask)
(*si3_mask_1, *di3_mask, *di3_mask_1)
(*si3_extend_mask, *si3_extend_mask_1): Use
new predicate "subreg_lowpart_operator"
---
 gcc/config/riscv/predicates.md |  5 +++
 gcc/config/riscv/riscv.md  | 70 +-
 2 files changed, 40 insertions(+), 35 deletions(-)

diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md
index ef821add663..23211513554 100644
--- a/gcc/config/riscv/predicates.md
+++ b/gcc/config/riscv/predicates.md
@@ -198,6 +198,11 @@
 (define_predicate "signed_order_operator"
   (match_code "eq,ne,lt,le,ge,gt"))
 
+(define_predicate "subreg_lowpart_operator"
+  (ior (match_code "truncate")
+   (and (match_code "subreg")
+(match_test "subreg_lowpart_p (op)"
+
 (define_predicate "fp_native_comparison"
   (match_code "eq,lt,le,gt,ge"))
 
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index fcdcc3abaa0..c3687d57047 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -480,9 +480,9 @@
 (define_insn "*addsi3_extended2"
   [(set (match_operand:DI   0 "register_operand" "=r,r")
(sign_extend:DI
- (subreg:SI (plus:DI (match_operand:DI 1 "register_operand" " r,r")
- (match_operand:DI 2 "arith_operand"" r,I"))
-0)))]
+ (match_operator:SI 3 "subreg_lowpart_operator"
+[(plus:DI (match_operand:DI 1 "register_operand" " r,r")
+  (match_operand:DI 2 "arith_operand"" r,I"))])))]
   "TARGET_64BIT"
   "add%i2w\t%0,%1,%2"
   [(set_attr "type" "arith")
@@ -536,9 +536,9 @@
 (define_insn "*subsi3_extended2"
   [(set (match_operand:DI0 "register_operand" "= r")
(sign_extend:DI
- (subreg:SI (minus:DI (match_operand:DI 1 "reg_or_0_operand" " rJ")
-  (match_operand:DI 2 "register_operand" "  r"))
-0)))]
+ (match_operator:SI 3 "subreg_lowpart_operator"
+   [(minus:DI (match_operand:DI 1 "reg_or_0_operand" " rJ")
+  (match_operand:DI 2 "register_operand" "  r"))])))]
   "TARGET_64BIT"
   "subw\t%0,%z1,%2"
   [(set_attr "type" "arith")
@@ -572,8 +572,8 @@
 (define_insn "*negsi2_extended2"
   [(set (match_operand:DI 0 "register_operand" "=r")
(sign_extend:DI
-(subreg:SI (neg:DI (match_operand:DI 1 "register_operand" " r"))
-   0)))]
+(match_operator:SI 2 "subreg_lowpart_operator"
+  [(neg:DI (match_operand:DI 1 "register_operand" " r"))])))]
   "TARGET_64BIT"
   "negw\t%0,%1"
   [(set_attr "type" "arith")
@@ -627,9 +627,9 @@
 (define_insn "*mulsi3_extended2"
   [(set (match_operand:DI   0 "register_operand" "=r")
(sign_extend:DI
- (subreg:SI (mult:DI (match_operand:DI 1 "register_operand" " r")
- (match_operand:DI 2 "register_operand" " r"))
-0)))]
+ (match_operator:SI 3 "subreg_lowpart_operator"
+   [(mult:DI (match_operand:DI 1 "register_operand" " r")
+ (match_operand:DI 2 "register_operand" " r"))])))]
   "TARGET_MUL && TARGET_64BIT"
   "mulw\t%0,%1,%2"
   [(set_attr "type" "imul")
@@ -1591,10 +1591,10 @@
   [(set (match_operand:SI 0 "register_operand" "= r")
(any_shift:SI
(match_operand:SI 1 "register_operand" "  r")
-   (subreg:QI
-(and:SI
- (match_operand:SI 2 "register_operand"  "r")
- (match_operand 3 "const_int_operand")) 0)))]
+   (match_operator 4 "subreg_lowpart_operator"
+[(and:SI
+  (match_operand:SI 2 "register_operand"  "r")
+  (match_operand 3 "const_int_operand"))])))]
   "(INTVAL (operands[3]) & (GET_MODE_BITSIZE (SImode)-1))
== GET_MODE_BITSIZE (SImode)-1"
   "#"
@@ -1610,10 +1610,10 @@
   [(set (match_operand:SI 0 "register_operand" "= r")
(any_shift:SI
(match_operand:SI 1 "register_operand" "  r")
-   (subreg:QI
-(and:DI
- (match_operand:DI 2 "register_operand"  "r")
- (match_operand 3 "const_int_operand")) 0)))]
+   (match_operator 4 "subreg_lowpart_operator"
+[(and:DI
+  (match_operand:DI 2 "register_operand"  "r")
+  (match_operand 3 "const_int_operand"))])))]
   "TARGET_64BIT
&& (INTVAL (operands[3]) & (GET_MODE_BITSIZE (SImode)-1))
== GET_MODE_BITSIZE (SImode)-1"
@@ -1646,10 +1646,10 @@
   [(set (match_operand:DI 0 

[PATCH v3 5/6] RISC-V: Update shift-shift-5.c testcase for big endian

2021-02-24 Thread Marcus Comstedt
gcc/
* testsuite/gcc.target/riscv/shift-shift-5.c (sub): Change
order of struct fields depending on byteorder.
---
 gcc/testsuite/gcc.target/riscv/shift-shift-5.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/shift-shift-5.c 
b/gcc/testsuite/gcc.target/riscv/shift-shift-5.c
index 5b2ae89a471..0ecab9723c9 100644
--- a/gcc/testsuite/gcc.target/riscv/shift-shift-5.c
+++ b/gcc/testsuite/gcc.target/riscv/shift-shift-5.c
@@ -7,7 +7,11 @@ unsigned long
 sub (long l)
 {
   union u {
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
 struct s { int a : 19; unsigned int b : 13; int x; } s;
+#else
+struct s { int x; unsigned int b : 13; int a : 19; } s;
+#endif
 long l;
   } u;
   u.l = l;
-- 
2.26.2



[PATCH v3 4/6] RISC-V: Fix trampoline generation on big endian

2021-02-24 Thread Marcus Comstedt
gcc/
* config/riscv/riscv.c (riscv_swap_instruction): New function
to byteswap an SImode rtx containing an instruction.
(riscv_trampoline_init): Byteswap the generated instructions
when needed.
---
 gcc/config/riscv/riscv.c | 19 +++
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
index eab14602355..1cd795bd19c 100644
--- a/gcc/config/riscv/riscv.c
+++ b/gcc/config/riscv/riscv.c
@@ -1073,6 +1073,15 @@ riscv_force_binary (machine_mode mode, enum rtx_code 
code, rtx x, rtx y)
   return riscv_emit_binary (code, gen_reg_rtx (mode), x, y);
 }
 
+static rtx
+riscv_swap_instruction (rtx inst)
+{
+  gcc_assert (GET_MODE (inst) == SImode);
+  if (BYTES_BIG_ENDIAN)
+inst = expand_unop (SImode, bswap_optab, inst, gen_reg_rtx (SImode), 1);
+  return inst;
+}
+
 /* Copy VALUE to a register and return that register.  If new pseudos
are allowed, copy it into a new register, otherwise use DEST.  */
 
@@ -4953,7 +4962,7 @@ riscv_trampoline_init (rtx m_tramp, tree fndecl, rtx 
chain_value)
 gen_int_mode (lui_hi_chain_code, 
SImode));
 
   mem = adjust_address (m_tramp, SImode, 0);
-  riscv_emit_move (mem, lui_hi_chain);
+  riscv_emit_move (mem, riscv_swap_instruction (lui_hi_chain));
 
   /* Gen lui t0, hi(func).  */
   rtx hi_func = riscv_force_binary (SImode, PLUS, target_function,
@@ -4965,7 +4974,7 @@ riscv_trampoline_init (rtx m_tramp, tree fndecl, rtx 
chain_value)
gen_int_mode (lui_hi_func_code, 
SImode));
 
   mem = adjust_address (m_tramp, SImode, 1 * GET_MODE_SIZE (SImode));
-  riscv_emit_move (mem, lui_hi_func);
+  riscv_emit_move (mem, riscv_swap_instruction (lui_hi_func));
 
   /* Gen addi t2, t2, lo(chain).  */
   rtx lo_chain = riscv_force_binary (SImode, AND, chain_value,
@@ -4980,7 +4989,7 @@ riscv_trampoline_init (rtx m_tramp, tree fndecl, rtx 
chain_value)
  force_reg (SImode, GEN_INT 
(lo_chain_code)));
 
   mem = adjust_address (m_tramp, SImode, 2 * GET_MODE_SIZE (SImode));
-  riscv_emit_move (mem, addi_lo_chain);
+  riscv_emit_move (mem, riscv_swap_instruction (addi_lo_chain));
 
   /* Gen jr t0, lo(func).  */
   rtx lo_func = riscv_force_binary (SImode, AND, target_function,
@@ -4993,7 +5002,7 @@ riscv_trampoline_init (rtx m_tramp, tree fndecl, rtx 
chain_value)
   force_reg (SImode, GEN_INT 
(lo_func_code)));
 
   mem = adjust_address (m_tramp, SImode, 3 * GET_MODE_SIZE (SImode));
-  riscv_emit_move (mem, jr_lo_func);
+  riscv_emit_move (mem, riscv_swap_instruction (jr_lo_func));
 }
   else
 {
@@ -5019,6 +5028,8 @@ riscv_trampoline_init (rtx m_tramp, tree fndecl, rtx 
chain_value)
   /* Copy the trampoline code.  */
   for (i = 0; i < ARRAY_SIZE (trampoline); i++)
{
+ if (BYTES_BIG_ENDIAN)
+   trampoline[i] = __builtin_bswap32(trampoline[i]);
  mem = adjust_address (m_tramp, SImode, i * GET_MODE_SIZE (SImode));
  riscv_emit_move (mem, gen_int_mode (trampoline[i], SImode));
}
-- 
2.26.2



[PATCH v3 3/6] RISC-V: Update soft-fp config for big-endian

2021-02-24 Thread Marcus Comstedt
libgcc/
* config/riscv/sfp-machine.h (__BYTE_ORDER): Set according
to __BYTE_ORDER__.
---
 libgcc/config/riscv/sfp-machine.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/libgcc/config/riscv/sfp-machine.h 
b/libgcc/config/riscv/sfp-machine.h
index db2697157ce..8adbf4b8b2e 100644
--- a/libgcc/config/riscv/sfp-machine.h
+++ b/libgcc/config/riscv/sfp-machine.h
@@ -128,7 +128,11 @@ do {   
\
 #define__LITTLE_ENDIAN 1234
 #define__BIG_ENDIAN4321
 
+#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+#define __BYTE_ORDER __BIG_ENDIAN
+#else
 #define __BYTE_ORDER __LITTLE_ENDIAN
+#endif
 
 
 /* Define ALIASNAME as a strong alias for NAME.  */
-- 
2.26.2



[PATCH v3 2/6] RISC-V: Add riscv{32,64}be with big endian as default

2021-02-24 Thread Marcus Comstedt
gcc/
* common/config/riscv/riscv-common.c
(TARGET_DEFAULT_TARGET_FLAGS): Set default endianness.
* config.gcc (riscv32be-*, riscv64be-*): Set
TARGET_BIG_ENDIAN_DEFAULT to 1.
* config/riscv/elf.h (LINK_SPEC): Change -melf* value
depending on default endianness.
* config/riscv/freebsd.h (LINK_SPEC): Likewise.
* config/riscv/linux.h (LINK_SPEC): Likewise.
* config/riscv/riscv.c (TARGET_DEFAULT_TARGET_FLAGS): Set
default endianness.
* config/riscv/riscv.h (DEFAULT_ENDIAN_SPEC): New macro.
---
 gcc/common/config/riscv/riscv-common.c |  5 +
 gcc/config.gcc | 15 +++
 gcc/config/riscv/elf.h |  2 +-
 gcc/config/riscv/freebsd.h |  2 +-
 gcc/config/riscv/linux.h   |  2 +-
 gcc/config/riscv/riscv.c   |  5 +
 gcc/config/riscv/riscv.h   |  6 ++
 7 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/gcc/common/config/riscv/riscv-common.c 
b/gcc/common/config/riscv/riscv-common.c
index 6bbe25dba89..34b74e52a2d 100644
--- a/gcc/common/config/riscv/riscv-common.c
+++ b/gcc/common/config/riscv/riscv-common.c
@@ -32,6 +32,11 @@ along with GCC; see the file COPYING3.  If not see
 #include "config/riscv/riscv-protos.h"
 #include "config/riscv/riscv-subset.h"
 
+#ifdef  TARGET_BIG_ENDIAN_DEFAULT
+#undef  TARGET_DEFAULT_TARGET_FLAGS
+#define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_ENDIAN)
+#endif
+
 /* Type for implied ISA info.  */
 struct riscv_implied_info_t
 {
diff --git a/gcc/config.gcc b/gcc/config.gcc
index 17fea83b2e4..ae47e430062 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -2464,6 +2464,11 @@ riscv*-*-linux*)
tmake_file="${tmake_file} riscv/t-riscv riscv/t-linux"
gnu_ld=yes
gas=yes
+   case $target in
+   riscv32be-*|riscv64be-*)
+   tm_defines="${tm_defines} TARGET_BIG_ENDIAN_DEFAULT=1"
+   ;;
+   esac
# Force .init_array support.  The configure script cannot always
# automatically detect that GAS supports it, yet we require it.
gcc_cv_initfini_array=yes
@@ -2487,6 +2492,11 @@ riscv*-*-elf* | riscv*-*-rtems*)
tmake_file="${tmake_file} riscv/t-riscv"
gnu_ld=yes
gas=yes
+   case $target in
+   riscv32be-*|riscv64be-*)
+   tm_defines="${tm_defines} TARGET_BIG_ENDIAN_DEFAULT=1"
+   ;;
+   esac
# Force .init_array support.  The configure script cannot always
# automatically detect that GAS supports it, yet we require it.
gcc_cv_initfini_array=yes
@@ -2496,6 +2506,11 @@ riscv*-*-freebsd*)
tmake_file="${tmake_file} riscv/t-riscv"
gnu_ld=yes
gas=yes
+   case $target in
+   riscv32be-*|riscv64be-*)
+   tm_defines="${tm_defines} TARGET_BIG_ENDIAN_DEFAULT=1"
+   ;;
+   esac
# Force .init_array support.  The configure script cannot always
# automatically detect that GAS supports it, yet we require it.
gcc_cv_initfini_array=yes
diff --git a/gcc/config/riscv/elf.h b/gcc/config/riscv/elf.h
index 973efdaed7b..7e65e499031 100644
--- a/gcc/config/riscv/elf.h
+++ b/gcc/config/riscv/elf.h
@@ -18,7 +18,7 @@ along with GCC; see the file COPYING3.  If not see
 .  */
 
 #define LINK_SPEC "\
--melf" XLEN_SPEC "lriscv \
+-melf" XLEN_SPEC DEFAULT_ENDIAN_SPEC "riscv \
 %{mno-relax:--no-relax} \
 %{mbig-endian:-EB} \
 %{mlittle-endian:-EL} \
diff --git a/gcc/config/riscv/freebsd.h b/gcc/config/riscv/freebsd.h
index f3aca9f7673..6018e7bb764 100644
--- a/gcc/config/riscv/freebsd.h
+++ b/gcc/config/riscv/freebsd.h
@@ -40,7 +40,7 @@ along with GCC; see the file COPYING3.  If not see
 
 #undef LINK_SPEC
 #define LINK_SPEC "\
-  -melf" XLEN_SPEC "lriscv \
+  -melf" XLEN_SPEC DEFAULT_ENDIAN_SPEC "riscv  \
   %{p:%nconsider using `-pg' instead of `-p' with gprof (1)}   \
   %{v:-V}  \
   %{assert*} %{R*} %{rpath*} %{defsym*}\
diff --git a/gcc/config/riscv/linux.h b/gcc/config/riscv/linux.h
index e74f5d3f914..fce5b896e6e 100644
--- a/gcc/config/riscv/linux.h
+++ b/gcc/config/riscv/linux.h
@@ -58,7 +58,7 @@ along with GCC; see the file COPYING3.  If not see
   "%{mabi=ilp32:_ilp32}"
 
 #define LINK_SPEC "\
--melf" XLEN_SPEC "lriscv" LD_EMUL_SUFFIX " \
+-melf" XLEN_SPEC DEFAULT_ENDIAN_SPEC "riscv" LD_EMUL_SUFFIX " \
 %{mno-relax:--no-relax} \
 %{mbig-endian:-EB} \
 %{mlittle-endian:-EL} \
diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
index fffd0814eee..eab14602355 100644
--- a/gcc/config/riscv/riscv.c
+++ b/gcc/config/riscv/riscv.c
@@ -5524,6 +5524,11 @@ riscv_asan_shadow_offset (void)
 #undef TARGET_ASAN_SHADOW_OFFSET
 #define TARGET_ASAN_SHADOW_OFFSET 

[PATCH v3 1/6] RISC-V: Support -mlittle-endian and -mbig-endian

2021-02-24 Thread Marcus Comstedt
gcc/
* config/riscv/elf.h (LINK_SPEC): Pass linker endianness flag.
* config/riscv/freebsd.h (LINK_SPEC): Likewise.
* config/riscv/linux.h (LINK_SPEC): Likewise.
* config/riscv/riscv.h (ASM_SPEC): Pass -mbig-endian and
-mlittle-endian.
(BYTES_BIG_ENDIAN): Handle big endian.
(WORDS_BIG_ENDIAN): Define to BYTES_BIG_ENDIAN.
* config/riscv/riscv.opt (-mbig-endian, -mlittle-endian): New
options.
* doc/invoke.texi (-mbig-endian, -mlittle-endian): Document.
---
 gcc/config/riscv/elf.h |  2 ++
 gcc/config/riscv/freebsd.h |  2 ++
 gcc/config/riscv/linux.h   |  2 ++
 gcc/config/riscv/riscv.h   |  6 --
 gcc/config/riscv/riscv.opt |  8 
 gcc/doc/invoke.texi| 12 
 6 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/gcc/config/riscv/elf.h b/gcc/config/riscv/elf.h
index d136d46e4fa..973efdaed7b 100644
--- a/gcc/config/riscv/elf.h
+++ b/gcc/config/riscv/elf.h
@@ -20,6 +20,8 @@ along with GCC; see the file COPYING3.  If not see
 #define LINK_SPEC "\
 -melf" XLEN_SPEC "lriscv \
 %{mno-relax:--no-relax} \
+%{mbig-endian:-EB} \
+%{mlittle-endian:-EL} \
 %{shared}"
 
 /* Link against Newlib libraries, because the ELF backend assumes Newlib.
diff --git a/gcc/config/riscv/freebsd.h b/gcc/config/riscv/freebsd.h
index a48bf9bffe4..f3aca9f7673 100644
--- a/gcc/config/riscv/freebsd.h
+++ b/gcc/config/riscv/freebsd.h
@@ -44,6 +44,8 @@ along with GCC; see the file COPYING3.  If not see
   %{p:%nconsider using `-pg' instead of `-p' with gprof (1)}   \
   %{v:-V}  \
   %{assert*} %{R*} %{rpath*} %{defsym*}\
+  %{mbig-endian:-EB}   \
+  %{mlittle-endian:-EL}\
   %{shared:-Bshareable %{h*} %{soname*}}   \
   %{symbolic:-Bsymbolic}   \
   %{static:-Bstatic}   \
diff --git a/gcc/config/riscv/linux.h b/gcc/config/riscv/linux.h
index 9238de5bc92..e74f5d3f914 100644
--- a/gcc/config/riscv/linux.h
+++ b/gcc/config/riscv/linux.h
@@ -60,6 +60,8 @@ along with GCC; see the file COPYING3.  If not see
 #define LINK_SPEC "\
 -melf" XLEN_SPEC "lriscv" LD_EMUL_SUFFIX " \
 %{mno-relax:--no-relax} \
+%{mbig-endian:-EB} \
+%{mlittle-endian:-EL} \
 %{shared} \
   %{!shared: \
 %{!static: \
diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
index c6f8bee07ef..0b667d2e8b9 100644
--- a/gcc/config/riscv/riscv.h
+++ b/gcc/config/riscv/riscv.h
@@ -91,6 +91,8 @@ extern const char *riscv_default_mtune (int argc, const char 
**argv);
 %{" FPIE_OR_FPIC_SPEC ":-fpic} \
 %{march=*} \
 %{mabi=*} \
+%{mbig-endian} \
+%{mlittle-endian} \
 %(subtarget_asm_spec)" \
 ASM_MISA_SPEC
 
@@ -126,8 +128,8 @@ ASM_MISA_SPEC
 /* Target machine storage layout */
 
 #define BITS_BIG_ENDIAN 0
-#define BYTES_BIG_ENDIAN 0
-#define WORDS_BIG_ENDIAN 0
+#define BYTES_BIG_ENDIAN (TARGET_BIG_ENDIAN != 0)
+#define WORDS_BIG_ENDIAN (BYTES_BIG_ENDIAN)
 
 #define MAX_BITS_PER_WORD 64
 
diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt
index 761a09d18c3..e294e223151 100644
--- a/gcc/config/riscv/riscv.opt
+++ b/gcc/config/riscv/riscv.opt
@@ -21,6 +21,14 @@
 HeaderInclude
 config/riscv/riscv-opts.h
 
+mbig-endian
+Target RejectNegative Mask(BIG_ENDIAN)
+Assume target CPU is configured as big endian.
+
+mlittle-endian
+Target RejectNegative InverseMask(BIG_ENDIAN)
+Assume target CPU is configured as little endian.
+
 mbranch-cost=
 Target RejectNegative Joined UInteger Var(riscv_branch_cost)
 -mbranch-cost=NSet the cost of branches to roughly N instructions.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index e8baa545eee..9279a37a832 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -1169,6 +1169,7 @@ See RS/6000 and PowerPC Options.
 -mrelax  -mno-relax @gol
 -mriscv-attribute  -mmo-riscv-attribute @gol
 -malign-data=@var{type} @gol
+-mbig-endian  -mlittle-endian @gol
 +-mstack-protector-guard=@var{guard} -mstack-protector-guard-reg=@var{reg} @gol
 +-mstack-protector-guard-offset=@var{offset}}
 
@@ -26721,6 +26722,17 @@ types.  Supported values for @var{type} are 
@samp{xlen} which uses x register
 width as the alignment value, and @samp{natural} which uses natural alignment.
 @samp{xlen} is the default.
 
+@item -mbig-endian
+@opindex mbig-endian
+Generate big-endian code.  This is the default when GCC is configured for a
+@samp{riscv64be-*-*} or @samp{riscv32be-*-*} target.
+
+@item -mlittle-endian
+@opindex mlittle-endian
+Generate little-endian code.  This is the default when GCC is configured for a
+@samp{riscv64-*-*} or @samp{riscv32-*-*} but not a @samp{riscv64be-*-*} or
+@samp{riscv32be-*-*} target.
+
 @item -mstack-protector-guard=@var{guard}
 @itemx -mstack-protector-guard-reg=@var{reg}
 @itemx 

[PATCH v3 0/6] RISC-V big endian support

2021-02-24 Thread Marcus Comstedt
New update of the RISC-V big endian support.

Changes since v2:

* Replaced matches of (subreg ... 0) in riscv.md with calls to
  a predicate "subreg_lowpart_operator", modeled on how
  arm.md and aarch64.md works.

Testsuite result on 64-bit is now

 rv64gc/   lp64/ medlow |   12 / 6 |   39 /10 |  - |

on both big-endian and little-endian.

On 32-bit I'm seeing a lot of

  User store segfault @ 0x7f7f

in the execution tests.  Need to investigate this further.


  // Marcus





Re: [PATCH] c++: Private parent access check for using decls [PR19377]

2021-02-24 Thread Anthony Sharp via Gcc-patches
> "special"


It wouldn't be my code if it didn't have sp3ling mstakes innit!
Actually to be fair I already changed that spelling mistake a few days
ago in my local code ;)

I was actually thinking about this last night as I was falling asleep
(as you do) and I realised that the whole of my using decl lookup is
redundant. I can simply do this (formatting probably messes up here):

/* 1.  If the "using" keyword is used to inherit DECL within the parent,
 this may cause DECL to be private, so we should return the using
 statement as the source of the problem.

 Scan the fields of PARENT_BINFO and see if there are any using decls.  If
 there are, see if they inherit DECL.  If they do, that's where DECL must
 have been declared private.  */

  for (tree parent_field = TYPE_FIELDS (BINFO_TYPE (parent_binfo));
   parent_field;
   parent_field = DECL_CHAIN (parent_field))
{
  /* Not necessary, but also check TREE_PRIVATE for the sake of
  eliminating obviously non-relevant using decls.  */
  if (TREE_CODE (parent_field) == USING_DECL
 && TREE_PRIVATE (parent_field))
{
/* If the using statement inherits DECL, it is the source of the
 access failure, so return it.  */
 if (cp_tree_equal (strip_using_decl (parent_field), decl))
   return parent_field;
}
}

I was wrong to say that the using decl does not store "where it came
from/what it inherits" - that's exactly what strip_using_decl
achieves. I think the problem was that when I did my initial testing
in trying out ways to get the original decl, I didn't strip it, so the
comparison failed, which led me to make the whole redundant lookup,
blah blah blah.

I've run a quick test and it seems to work, even with the overloads.

Will test it some more and if all's good I will probably send a new
patch some time this weekend.

> I was thinking you could walk through the overload set to see if it
> contains DECL.

I did try that ... sort of. I did a name lookup on the using decl and
that returned a baselink (no idea why, since the lookup function says
it returns a tree list [probably me being dumb]), which then gave me a
bunch of overloads. But that didn't seem to help since if multiple
using decls give me the answer I'm looking for (a match for DECL)
because they were overloaded, then there was no way for me to tell
which using decl was actually the correct one. Kind of like if three
cakes are equally as tasty, then how are you supposed to tell which
one is the most delicious?

Anthony


c++: Macro location fixes [PR 98718]

2021-02-24 Thread Nathan Sidwell


This fixes some issues with macro maps.  We were incorrectly calculating 
the number of macro expansions in a location span, and I had a 
workaround that partially covered that up.  Further, while macro 
location spans are monotonic, that is not true of ordinary location 
spans.  Thus we need to insert an indirection array when binary 
searching the latter. (We load ordinary locations before loading 
imports, but macro locations afterwards.  We make sure an import 
location is de-macrofied, if needed.)


PR c++/98718
gcc/cp/
* module.cc (ool): New indirection vector.
(loc_spans::maybe_propagate): Location is not optional.
(loc_spans::open): Likewise.  Assert monotonically advancing.
(module_for_ordinary_loc): Use ool indirection vector.
(module_state::write_prepare_maps): Do not count empty macro
expansions.  Elide empty spans.
(module_state::write_macro_maps): Skip empty expansions.
(ool_cmp): New qsort comparator.
(module_state::write): Create and destroy ool vector.
(name_pending_imports): Fix dump push/pop.
(preprocess_module): Likewise.  Add more dumping.
(preprocessed_module): Likewise.
libcpp/
* include/line-map.h
* line-map.c
gcc/testsuite/
* g++.dg/modules/pr98718_a.C: New.
* g++.dg/modules/pr98718_b.C: New.


--
Nathan Sidwell
diff --git c/gcc/cp/module.cc w/gcc/cp/module.cc
index 766f2ab853d..e576face0d8 100644
--- c/gcc/cp/module.cc
+++ w/gcc/cp/module.cc
@@ -3363,6 +3363,8 @@ public:
 };
 
 static loc_spans spans;
+/* Indirection to allow bsearching imports by ordinary location.  */
+static vec *ool;
 
 //
 /* Data needed by a module during the process of loading.  */
@@ -13758,13 +13760,12 @@ loc_spans::init (const line_maps *lmaps, const line_map_ordinary *map)
interface and we're importing a partition.  */
 
 bool
-loc_spans::maybe_propagate (module_state *import,
-			location_t loc = UNKNOWN_LOCATION)
+loc_spans::maybe_propagate (module_state *import, location_t hwm)
 {
   bool opened = (module_interface_p () && !module_partition_p ()
 		 && import->is_partition ());
   if (opened)
-open (loc);
+open (hwm);
   return opened;
 }
 
@@ -13772,11 +13773,8 @@ loc_spans::maybe_propagate (module_state *import,
first map of the interval.  */
 
 void
-loc_spans::open (location_t hwm = UNKNOWN_LOCATION)
+loc_spans::open (location_t hwm)
 {
-  if (hwm == UNKNOWN_LOCATION)
-hwm = MAP_START_LOCATION (LINEMAPS_LAST_ORDINARY_MAP (line_table));
-
   span interval;
   interval.ordinary.first = interval.ordinary.second = hwm;
   interval.macro.first = interval.macro.second
@@ -13786,6 +13784,13 @@ loc_spans::open (location_t hwm = UNKNOWN_LOCATION)
 && dump ("Opening span %u ordinary:[%u,... macro:...,%u)",
 	 spans->length (), interval.ordinary.first,
 	 interval.macro.second);
+  if (spans->length ())
+{
+  /* No overlapping!  */
+  auto  = spans->last ();
+  gcc_checking_assert (interval.ordinary.first >= last.ordinary.second);
+  gcc_checking_assert (interval.macro.second <= last.macro.first);
+}
   spans->safe_push (interval);
 }
 
@@ -15547,13 +15552,13 @@ enum loc_kind {
 static const module_state *
 module_for_ordinary_loc (location_t loc)
 {
-  unsigned pos = 1;
-  unsigned len = modules->length () - pos;
+  unsigned pos = 0;
+  unsigned len = ool->length () - pos;
 
   while (len)
 {
   unsigned half = len / 2;
-  module_state *probe = (*modules)[pos + half];
+  module_state *probe = (*ool)[pos + half];
   if (loc < probe->ordinary_locs.first)
 	len = half;
   else if (loc < probe->ordinary_locs.second)
@@ -15565,7 +15570,7 @@ module_for_ordinary_loc (location_t loc)
 	}
 }
 
-  return NULL;
+  return nullptr;
 }
 
 static const module_state *
@@ -15849,31 +15854,49 @@ module_state::write_prepare_maps (module_state_config *)
   for (unsigned ix = loc_spans::SPAN_FIRST; ix != spans.length (); ix++)
 {
   loc_spans::span  = spans[ix];
-  line_map_ordinary const *omap
-	= linemap_check_ordinary (linemap_lookup (line_table,
-		  span.ordinary.first));
-
-  /* We should exactly match up.  */
-  gcc_checking_assert (MAP_START_LOCATION (omap) == span.ordinary.first);
 
-  line_map_ordinary const *fmap = omap;
-  for (; MAP_START_LOCATION (omap) < span.ordinary.second; omap++)
+  if (span.ordinary.first != span.ordinary.second)
 	{
-	  /* We should never find a module linemap in an interval.  */
-	  gcc_checking_assert (!MAP_MODULE_P (omap));
+	  line_map_ordinary const *omap
+	= linemap_check_ordinary (linemap_lookup (line_table,
+		  span.ordinary.first));
 
-	  if (max_range < omap->m_range_bits)
-	max_range = omap->m_range_bits;
-	}
+	  /* We should exactly match up.  */
+	  gcc_checking_assert (MAP_START_LOCATION (omap) == 

[PATCH] Darwin, testsuite : Prune 'object file not found for object'.

2021-02-24 Thread Iain Sandoe
Hi,

This is not a GCC problem, but a fault in the static linker where,
when a source file is used multiple times, with conditional compilation
the source file is only referenced by the linker for the first object.
Then, when dsymutil tries to find the source file for next object based
off that source there is no record for it.

I’ve had this patch kicking around for some time, in the hope that the
problem would be fixed in the XCode tools, but it’s still present in the
XC12.5b2 (and will never be fixed in older toolkits).

tested on *-darwin* and x86_64-linux-gnu,
OK for master?
Iain
(if were not in stage 4, I’d have applied it as obvious).

gcc/testsuite/ChangeLog:

* lib/prune.exp: Prune useless output caused by a linker bug.
---
 gcc/testsuite/lib/prune.exp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/gcc/testsuite/lib/prune.exp b/gcc/testsuite/lib/prune.exp
index a349c8ace3e..2809f88b16f 100644
--- a/gcc/testsuite/lib/prune.exp
+++ b/gcc/testsuite/lib/prune.exp
@@ -84,6 +84,9 @@ proc prune_gcc_output { text } {
 # Ignore harmless warnings from Xcode 4.0.
 regsub -all "(^|\n)\[^\n\]*ld: warning: could not create compact unwind 
for\[^\n\]*" $text "" text
 
+# Ignore dsymutil warning (tool bug is actually linker)
+regsub -all "(^|\n)\[^\n\]*could not find object file symbol for 
symbol\[^\n\]*" $text "" text
+
 # If dg-enable-nn-line-numbers was provided, then obscure source-margin
 # line numbers by converting them to "NN" form.
 set text [maybe-handle-nn-line-numbers $text]
-- 
2.24.1




[PATCH] coroutines : Call promise CTOR with parm copies [PR97587].

2021-02-24 Thread Iain Sandoe
Hi,

As the PR notes, we were calling the promise CTOR with the original
function parameters, not the copy (as pointed, a previous wording of
the section on this was unambiguous).  Fixed thus.

tested on x86_64-darwin, x86_64-linux-gnu,
this is a wrong-code bug,

OK for master / 10.x?
thanks
Iain

gcc/cp/ChangeLog:

PR c++/97587
* coroutines.cc (struct param_info): Track rvalue refs.
(morph_fn_to_coro): Track rvalue refs, and call the promise
CTOR with the frame copy of passed parms.

gcc/testsuite/ChangeLog:

PR c++/97587
* g++.dg/coroutines/coro1-refs-and-ctors.h: Add a CTOR with two
reference parms, to distinguish the rvalue ref. variant.
* g++.dg/coroutines/pr97587.C: New test.
---
 gcc/cp/coroutines.cc  | 25 ++-
 .../g++.dg/coroutines/coro1-refs-and-ctors.h  |  7 ++--
 gcc/testsuite/g++.dg/coroutines/pr97587.C | 32 +++
 3 files changed, 54 insertions(+), 10 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/coroutines/pr97587.C

diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc
index 19d2ca3e23e..0b63914ef9b 100644
--- a/gcc/cp/coroutines.cc
+++ b/gcc/cp/coroutines.cc
@@ -1817,6 +1817,7 @@ struct param_info
   tree orig_type;/* The original type of the parm (not as passed).  */
   bool by_ref;   /* Was passed by reference.  */
   bool pt_ref;   /* Was a pointer to object.  */
+  bool rv_ref;   /* Was an rvalue ref.  */
   bool trivial_dtor; /* The frame type has a trivial DTOR.  */
   bool this_ptr; /* Is 'this' */
   bool lambda_cobj;  /* Lambda capture object */
@@ -4121,7 +4122,7 @@ morph_fn_to_coro (tree orig, tree *resumer, tree 
*destroyer)
  if (actual_type == NULL_TREE)
actual_type = error_mark_node;
  parm.orig_type = actual_type;
- parm.by_ref = parm.pt_ref = false;
+ parm.by_ref = parm.pt_ref = parm.rv_ref =  false;
  if (TREE_CODE (actual_type) == REFERENCE_TYPE)
{
  /* If the user passes by reference, then we will save the
@@ -4129,8 +4130,10 @@ morph_fn_to_coro (tree orig, tree *resumer, tree 
*destroyer)
 [dcl.fct.def.coroutine] / 13, if the lifetime of the
 referenced item ends and then the coroutine is resumed,
 we have UB; well, the user asked for it.  */
- actual_type = build_pointer_type (TREE_TYPE (actual_type));
- parm.pt_ref = true;
+ if (TYPE_REF_IS_RVALUE (actual_type))
+   parm.rv_ref = true;
+ else
+   parm.pt_ref = true;
}
  else if (TYPE_REF_P (DECL_ARG_TYPE (arg)))
parm.by_ref = true;
@@ -4498,16 +4501,22 @@ morph_fn_to_coro (tree orig, tree *resumer, tree 
*destroyer)
  tree this_ref = build1 (INDIRECT_REF, ct, arg);
  tree rt = cp_build_reference_type (ct, false);
  this_ref = convert_to_reference (rt, this_ref, CONV_STATIC,
-  LOOKUP_NORMAL , NULL_TREE,
+  LOOKUP_NORMAL, NULL_TREE,
   tf_warning_or_error);
  vec_safe_push (promise_args, this_ref);
}
- else if (parm.by_ref)
-   vec_safe_push (promise_args, fld_idx);
+ else if (parm.rv_ref)
+   vec_safe_push (promise_args, rvalue(fld_idx));
  else
-   vec_safe_push (promise_args, arg);
+   vec_safe_push (promise_args, fld_idx);
 
- if (TYPE_NEEDS_CONSTRUCTING (parm.frame_type))
+ if (parm.rv_ref || parm.pt_ref)
+   /* Initialise the frame reference field directly.  */
+   r = build_modify_expr (fn_start, TREE_OPERAND (fld_idx, 0),
+  parm.frame_type, INIT_EXPR,
+  DECL_SOURCE_LOCATION (arg), arg,
+  DECL_ARG_TYPE (arg));
+ else if (TYPE_NEEDS_CONSTRUCTING (parm.frame_type))
{
  vec *p_in;
  if (CLASS_TYPE_P (parm.frame_type)
diff --git a/gcc/testsuite/g++.dg/coroutines/coro1-refs-and-ctors.h 
b/gcc/testsuite/g++.dg/coroutines/coro1-refs-and-ctors.h
index 8831a07875e..dd45a0e1f81 100644
--- a/gcc/testsuite/g++.dg/coroutines/coro1-refs-and-ctors.h
+++ b/gcc/testsuite/g++.dg/coroutines/coro1-refs-and-ctors.h
@@ -3,10 +3,13 @@ struct coro1 {
   struct promise_type {
 
   promise_type () : vv(-1) {  PRINT ("Promise def. CTOR"); }
-  promise_type (int __x) : vv(__x) {  PRINTF ("Created Promise with 
%d\n",__x); }
+  promise_type (int __x) : vv(__x) {  PRINTF ("promise_type1 with %d\n",__x); }
   promise_type (int __x, int& __y, int&& __z)
 : vv(__x), v2(__y), v3(__z)
-{  PRINTF ("Created Promise with %d, %d, %d\n", __x, __y, __z); }
+{  PRINTF ("promise_type2 with %d, %d, %d\n", __x, __y, __z); }
+  promise_type (int __x, int& __y, 

[PATCH] coroutines : Remove throwing_cleanup marks from the ramp [PR95822].

2021-02-24 Thread Iain Sandoe
Hi,

The FE contains a mechanism for cleaning up return expressions if a
function throws during the execution of cleanups prior to the return.

If the original function has a return value with a non-trivial DTOR
and the body contains a var with a DTOR that might throw, the function
decl is marked "throwing_cleanup".

However, we do not [in the coroutine ramp function, which is
synthesised], use any body var types with DTORs that might throw.

The original body [which will then contain the type with the throwing
DTOR] is transformed into the actor function which only contains void
returns, and is also wrapped in a try-catch block.

So (a) the 'throwing_cleanup' is no longer correct for the ramp and
   (b) we do not need to transfer it to the actor which only contains
   void returns.

this is an ICE-on-valid,
tested on x86_64-darwin, x86_64-linux-gnu,

OK for master / 10.x ?
thanks
Iain

gcc/cp/ChangeLog:

PR c++/95822
* coroutines.cc (morph_fn_to_coro): Unconditionally remove any
set throwing_cleanup marker.

gcc/testsuite/ChangeLog:

PR c++/95822
* g++.dg/coroutines/pr95822.C: New test.
---
 gcc/cp/coroutines.cc  | 11 +
 gcc/testsuite/g++.dg/coroutines/pr95822.C | 29 +++
 2 files changed, 40 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/coroutines/pr95822.C

diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc
index abfe8d08192..19d2ca3e23e 100644
--- a/gcc/cp/coroutines.cc
+++ b/gcc/cp/coroutines.cc
@@ -4029,6 +4029,17 @@ morph_fn_to_coro (tree orig, tree *resumer, tree 
*destroyer)
   TREE_OPERAND (body_start, 0) = push_stmt_list ();
 }
 
+  /* If the original function has a return value with a non-trivial DTOR
+ and the body contains a var with a DTOR that might throw, the decl is
+ marked "throwing_cleanup".
+ We do not [in the ramp, which is synthesised here], use any body var
+ types with DTORs that might throw.
+ The original body is transformed into the actor function which only
+ contains void returns, and is also wrapped in a try-catch block.
+ So (a) the 'throwing_cleanup' is not correct for the ramp and (b) we do
+ not need to transfer it to the actor which only contains void returns.  */
+  cp_function_chain->throwing_cleanup = false;
+
   /* Create the coro frame type, as far as it can be known at this stage.
  1. Types we already know.  */
 
diff --git a/gcc/testsuite/g++.dg/coroutines/pr95822.C 
b/gcc/testsuite/g++.dg/coroutines/pr95822.C
new file mode 100644
index 000..f6284aa417e
--- /dev/null
+++ b/gcc/testsuite/g++.dg/coroutines/pr95822.C
@@ -0,0 +1,29 @@
+#include 
+
+struct task {
+  struct promise_type {
+auto initial_suspend() noexcept { return std::suspend_always{}; }
+auto final_suspend() noexcept { return std::suspend_always{}; }
+void return_void() {}
+task get_return_object() { return task{}; }
+void unhandled_exception() noexcept {}
+  };
+
+  ~task() noexcept {}
+
+  bool await_ready() const noexcept { return false; }
+  void await_suspend(std::coroutine_handle<>) noexcept {}
+  void await_resume() noexcept {}
+};
+
+struct Error {
+   Error() { };
+  ~Error() noexcept(false) {}
+};
+
+task g();
+
+task f() {
+  Error error;
+  co_await g();
+}
-- 
2.24.1



Re: [PATCH] match.pd: Use single_use for (T)(A) + CST -> (T)(A + CST) [PR95798]

2021-02-24 Thread Jakub Jelinek via Gcc-patches
On Wed, Feb 24, 2021 at 08:52:44PM +0100, Marc Glisse wrote:
> On Wed, 24 Feb 2021, Jakub Jelinek via Gcc-patches wrote:
> 
> > The following patch adds single_use case which restores these testcases
> > but keeps the testcases the patch meant to improve as is.
> 
> Hello,
> 
> I wonder if :s would be sufficient here? I don't have an opinion on which
> one is better for this particular transformation (don't change the patch
> because of my comment), we just seem to be getting more and more uses of
> single_use in match.pd, maybe at some point we need to revisit the meaning
> of :s or introduce a stronger :S.

:s seems to work for these testcases too, I'm never sure about :s
vs. single_use.

Jakub



Re: [PATCH] match.pd: Use single_use for (T)(A) + CST -> (T)(A + CST) [PR95798]

2021-02-24 Thread Marc Glisse

On Wed, 24 Feb 2021, Jakub Jelinek via Gcc-patches wrote:


The following patch adds single_use case which restores these testcases
but keeps the testcases the patch meant to improve as is.


Hello,

I wonder if :s would be sufficient here? I don't have an opinion on which 
one is better for this particular transformation (don't change the patch 
because of my comment), we just seem to be getting more and more uses of 
single_use in match.pd, maybe at some point we need to revisit the meaning 
of :s or introduce a stronger :S.


--
Marc Glisse


Re: [WIP] Re: [PATCH] openmp: Fix intermittent hanging of task-detach-6 libgomp tests [PR98738]

2021-02-24 Thread Jakub Jelinek via Gcc-patches
On Wed, Feb 24, 2021 at 06:17:01PM +, Kwok Cheung Yeung wrote:
> > 1) while linux --enable-futex and accel gomp_sem_t is small (int), rtems
> > and especially posix gomp_sem_t is large; so while it might be a good
> > idea to inline gomp_sem_t on config/{linux,accel} into the union, for
> > the rest it might be better to use indirection; if it is only for the
> > undeferred tasks, it could be just using an automatic variable and
> > put into the struct address of that; could be done either always,
> > or define some macro in config/{linux,accel}/sem.h that gomp_sem_t is
> > small and decide on the indirection based on that macro
> 
> I think a pointer to an automatic variable would be simplest.

Agreed.

> Can anything in cpyfn make use of the fact that kind==GOMP_TASK_UNDEFERRED
> while executing it? Anyway, if we want to keep this, then I suppose we could
> just add an extra field deferred_p that does not change for the lifetime of
> the task to indicate that the task is 'really' a deferred task.

Adding a bool is fine, but see bellow.

> > 3) kind is not constant, for the deferred tasks it can change over the
> > lifetime of the task, as you've said in the comments, it is kind ==
> > GOMP_TASK_UNDEFERRED vs. other values; while the changes of task->kind
> > are done while holding the task lock, omp_fulfill_event reads it before
> > locking that lock, so I think it needs to be done using
> > if (__atomic_load_n (>kind, MEMMODEL_RELAXED) == 
> > GOMP_TASK_UNDEFERRED)
> > Pedantically the stores to task->kind also need to be done
> > with __atomic_store_n MEMMODEL_RELAXED.
> 
> If we check task->deferred_p instead (which never changes for a task after
> instantiation), is that still necessary?

Not for kind or the new field.

> > - in gomp_barrier_handle_tasks the reason for if (new_tasks > 1)
> > is that if there is a single dependent task, the current thread
> > just finished handling one task and so can take that single task and so no
> > need to wake up.  While in the omp_fulfill_event case, even if there
> > is just one new task, we need to schedule it to some thread and so
> > is desirable to wake some thread.
> 
> In that case, we could just do 'if (new_tasks > 0)' instead?

Yes.
> 
> > All we know
> > (if team == gomp_thread ()->ts.team) is that at least one thread is doing
> > something else but that one could be busy for quite some time.
> 
> Well, it should still get around to the new task eventually, so there is no
> problem in terms of correctness here. I suppose we could always wake up one
> more thread than strictly necessary, but that might have knock-on effects on
> performance elsewhere?

Yeah, waking something unnecessarily is always going to cause performance
problems.

> I have applied your patch to move the gomp_team_barrier_done, and in
> omp_fulfill_event, I ensure that a single thread is woken up so that
> gomp_barrier_handle_tasks can signal for the barrier to finish.
> 
> I'm having some trouble coming up with a testcase to test this scenario
> though. I tried having a testcase like this to have threads in separate
> teams:

The unshackeled thread testcase would probably need a pthread_create
call and restricting the testcase to POSIX threads targets.
The teams in host teams (or target) don't have at least in OpenMP a way
to serialize, e.g. it can always be implemented like we do ATM.

But I guess that testcase can be done incrementally.

> @@ -545,8 +548,15 @@ struct gomp_task
>   entries and the gomp_task in which they reside.  */
>struct priority_node pnode[3];
>  
> -  bool detach;
> -  gomp_sem_t completion_sem;
> +  union {
> +/* Valid only if deferred_p is false.  */
> +gomp_sem_t *completion_sem;
> +/* Valid only if deferred_p is true.  Set to the team that executes the
> +   task if the task is detached and the completion event has yet to be
> +   fulfilled.  */
> +struct gomp_team *detach_team;
> +  };
> +  bool deferred_p;
>  
>struct gomp_task_icv icv;
>void (*fn) (void *);

What I don't like is that this creates too much wasteful padding
in a struct that should be as small as possible.
At least on 64-bit hosts which we care about most, pahole shows with your
patch:
struct gomp_task {
struct gomp_task * parent;   /* 0 8 */
struct priority_queue  children_queue;   /* 832 */
struct gomp_taskgroup *taskgroup;/*40 8 */
struct gomp_dependers_vec * dependers;   /*48 8 */
struct htab *  depend_hash;  /*56 8 */
/* --- cacheline 1 boundary (64 bytes) --- */
struct gomp_taskwait * taskwait; /*64 8 */
size_t depend_count; /*72 8 */
size_t num_dependees;/*80 8 */
int

[pushed] testsuite, coroutines : Make final_suspend calls noexcept.

2021-02-24 Thread Iain Sandoe
Hi,

The wording of [dcl.fct.def.coroutine]/15 states:
The expression co_await promise.final_suspend() shall not be
potentially-throwing.  A fair number of testcases are not correctly
marked. Fixed here.

mechanical patch,

tested on x86_64-darwin, x86-64-linux-gnu,
pushed as obvious
thanks
Iain

gcc/testsuite/ChangeLog:

* g++.dg/coroutines/co-await-void_type.C: Mark promise
final_suspend call as noexcept.
* g++.dg/coroutines/co-return-syntax-08-bad-return.C: Likewise.
* g++.dg/coroutines/co-return-syntax-10-movable.C: Likewise.
* g++.dg/coroutines/co-return-warning-1.C: Likewise.
* g++.dg/coroutines/co-yield-syntax-08-needs-expr.C: Likewise.
* g++.dg/coroutines/coro-bad-gro-00-class-gro-scalar-return.C: Likewise.
* g++.dg/coroutines/coro-bad-gro-01-void-gro-non-class-coro.C: Likewise.
* g++.dg/coroutines/coro-missing-gro.C: Likewise.
* g++.dg/coroutines/coro-missing-promise-yield.C: Likewise.
* g++.dg/coroutines/coro-missing-ret-value.C: Likewise.
* g++.dg/coroutines/coro-missing-ret-void.C: Likewise.
* g++.dg/coroutines/coro-missing-ueh.h: Likewise.
* g++.dg/coroutines/coro1-allocators.h: Likewise.
* g++.dg/coroutines/coro1-refs-and-ctors.h: Likewise.
* g++.dg/coroutines/coro1-ret-int-yield-int.h: Likewise.
* g++.dg/coroutines/pr94682-preview-this.C: Likewise.
* g++.dg/coroutines/pr94752.C: Likewise.
* g++.dg/coroutines/pr94760-mismatched-traits-and-promise-prev.C: 
Likewise.
* g++.dg/coroutines/pr94879-folly-1.C: Likewise.
* g++.dg/coroutines/pr94883-folly-2.C: Likewise.
* g++.dg/coroutines/pr95050.C: Likewise.
* g++.dg/coroutines/pr95345.C: Likewise.
* g++.dg/coroutines/pr95440.C: Likewise.
* g++.dg/coroutines/pr95591.C: Likewise.
* g++.dg/coroutines/pr95711.C: Likewise.
* g++.dg/coroutines/pr95813.C: Likewise.
* g++.dg/coroutines/symmetric-transfer-00-basic.C: Likewise.
* g++.dg/coroutines/torture/co-await-07-tmpl.C: Likewise.
* g++.dg/coroutines/torture/co-await-17-capture-comp-ref.C: Likewise.
* g++.dg/coroutines/torture/co-ret-00-void-return-is-ready.C: Likewise.
* g++.dg/coroutines/torture/co-ret-01-void-return-is-suspend.C: 
Likewise.
* g++.dg/coroutines/torture/co-ret-03-different-GRO-type.C: Likewise.
* g++.dg/coroutines/torture/co-ret-04-GRO-nontriv.C: Likewise.
* g++.dg/coroutines/torture/co-ret-06-template-promise-val-1.C: 
Likewise.
* g++.dg/coroutines/torture/co-ret-08-template-cast-ret.C: Likewise.
* g++.dg/coroutines/torture/co-ret-09-bool-await-susp.C: Likewise.
* g++.dg/coroutines/torture/co-ret-15-default-return_void.C: Likewise.
* g++.dg/coroutines/torture/co-ret-17-void-ret-coro.C: Likewise.
* g++.dg/coroutines/torture/co-yield-00-triv.C: Likewise.
* g++.dg/coroutines/torture/co-yield-03-tmpl.C: Likewise.
* g++.dg/coroutines/torture/co-yield-04-complex-local-state.C: Likewise.
* g++.dg/coroutines/torture/exceptions-test-0.C: Likewise.
* g++.dg/coroutines/torture/exceptions-test-01-n4849-a.C: Likewise.
* g++.dg/coroutines/torture/func-params-04.C: Likewise.
* g++.dg/coroutines/torture/local-var-06-structured-binding.C: Likewise.
* g++.dg/coroutines/torture/mid-suspend-destruction-0.C: Likewise.
---
 gcc/testsuite/g++.dg/coroutines/co-await-void_type.C | 2 +-
 .../g++.dg/coroutines/co-return-syntax-08-bad-return.C   | 2 +-
 .../g++.dg/coroutines/co-return-syntax-10-movable.C  | 2 +-
 gcc/testsuite/g++.dg/coroutines/co-return-warning-1.C| 2 +-
 .../g++.dg/coroutines/co-yield-syntax-08-needs-expr.C| 2 +-
 .../coroutines/coro-bad-gro-00-class-gro-scalar-return.C | 4 ++--
 .../coroutines/coro-bad-gro-01-void-gro-non-class-coro.C | 2 +-
 gcc/testsuite/g++.dg/coroutines/coro-missing-gro.C   | 2 +-
 gcc/testsuite/g++.dg/coroutines/coro-missing-promise-yield.C | 2 +-
 gcc/testsuite/g++.dg/coroutines/coro-missing-ret-value.C | 2 +-
 gcc/testsuite/g++.dg/coroutines/coro-missing-ret-void.C  | 2 +-
 gcc/testsuite/g++.dg/coroutines/coro-missing-ueh.h   | 2 +-
 gcc/testsuite/g++.dg/coroutines/coro1-allocators.h   | 2 +-
 gcc/testsuite/g++.dg/coroutines/coro1-refs-and-ctors.h   | 2 +-
 gcc/testsuite/g++.dg/coroutines/coro1-ret-int-yield-int.h| 2 +-
 gcc/testsuite/g++.dg/coroutines/pr94682-preview-this.C   | 2 +-
 gcc/testsuite/g++.dg/coroutines/pr94752.C| 2 +-
 .../coroutines/pr94760-mismatched-traits-and-promise-prev.C  | 2 +-
 gcc/testsuite/g++.dg/coroutines/pr94879-folly-1.C| 2 +-
 gcc/testsuite/g++.dg/coroutines/pr94883-folly-2.C| 2 +-
 gcc/testsuite/g++.dg/coroutines/pr95050.C| 2 +-
 gcc/testsuite/g++.dg/coroutines/pr95345.C| 2 +-
 

Re: [PATCH v2 0/5] RISC-V big endian support

2021-02-24 Thread Marcus Comstedt


Hi again.

I've found the reason for the shift-and test fails.

riscv.md does a match on

  (subreg:QI (and:SI ...) 0)

Unfortunately, due to the way "subreg" is defined, this needs to be

  (subreg:QI (and:SI ...) 3)

on big endian.  I can fix the failures by duplicating the rule and
making the one with "0" check !BYTES_BIG_ENDIAN and the one with "3"
check BYTES_BIG_ENDIAN.  But that's a bit heavy handed of course.
I'll try to come up with a solution using subreg_lowpart_p instead of
hardcoding "0" or "3".


  // Marcus




[committed] openmp: Diagnose invalid teams nested in target construct [PR99226]

2021-02-24 Thread Jakub Jelinek via Gcc-patches
Hi!

The OpenMP standard says:
"A teams region can only be strictly nested within the implicit parallel region
or a target region. If a teams construct is nested within a target construct,
that target construct must contain no statements, declarations or directives
outside of the teams construct."
We weren't diagnosing that restriction, because we need to allow e.g.
 #pragma omp target
 {{
   #pragma omp teams
   ;
 }}
and as target doesn't need to have teams nested in it, using some special
parser of the target body didn't feel right.  And after the parsing,
the question is if e.g. already parsing of the clauses doesn't add some
statements before the teams statement (gimplification certainly will).

As we now have a bugreport where we ICE on the invalid code, this just
diagnoses a subset of the invalid programs, in particular those where
nest to the teams strictly nested in targets the target region contains
some other OpenMP construct.

Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk.

2021-02-24  Jakub Jelinek  

PR fortran/99226
* omp-low.c (struct omp_context): Add teams_nested_p and
nonteams_nested_p members.
(scan_omp_target): Diagnose teams nested inside of target with other
directives strictly nested inside of the same target.
(check_omp_nesting_restrictions): Set ctx->teams_nested_p or
ctx->nonteams_nested_p as needed.

* c-c++-common/gomp/pr99226.c: New test.
* gfortran.dg/gomp/pr99226.f90: New test.

--- gcc/omp-low.c.jj2021-01-19 13:26:07.282118987 +0100
+++ gcc/omp-low.c   2021-02-24 15:06:41.897529041 +0100
@@ -171,6 +171,14 @@ struct omp_context
 
   /* True if there is bind clause on the construct (i.e. a loop construct).  */
   bool loop_p;
+
+  /* Only used for omp target contexts.  True if a teams construct is
+ strictly nested in it.  */
+  bool teams_nested_p;
+
+  /* Only used for omp target contexts.  True if an OpenMP construct other
+ than teams is strictly nested in it.  */
+  bool nonteams_nested_p;
 };
 
 static splay_tree all_contexts;
@@ -2956,6 +2964,14 @@ scan_omp_target (gomp_target *stmt, omp_
   if (offloaded)
fixup_child_record_type (ctx);
 }
+
+  if (ctx->teams_nested_p && ctx->nonteams_nested_p)
+{
+  error_at (gimple_location (stmt),
+   "% construct with nested % construct "
+   "contains directives outside of the % construct");
+  gimple_omp_set_body (stmt, gimple_build_bind (NULL, NULL, NULL));
+}
 }
 
 /* Scan an OpenMP teams directive.  */
@@ -3025,6 +3041,14 @@ check_omp_nesting_restrictions (gimple *
 
   if (ctx != NULL)
 {
+  if (gimple_code (ctx->stmt) == GIMPLE_OMP_TARGET
+ && gimple_omp_target_kind (ctx->stmt) == GF_OMP_TARGET_KIND_REGION)
+   {
+ if (gimple_code (stmt) == GIMPLE_OMP_TEAMS && !ctx->teams_nested_p)
+   ctx->teams_nested_p = true;
+ else
+   ctx->nonteams_nested_p = true;
+   }
   if (gimple_code (ctx->stmt) == GIMPLE_OMP_SCAN
  && ctx->outer
  && gimple_code (ctx->outer->stmt) == GIMPLE_OMP_FOR)
--- gcc/testsuite/c-c++-common/gomp/pr99226.c.jj2021-02-24 
15:04:39.213747973 +0100
+++ gcc/testsuite/c-c++-common/gomp/pr99226.c   2021-02-24 15:02:58.068825607 
+0100
@@ -0,0 +1,17 @@
+/* PR fortran/99226 */
+/* { dg-do compile } */
+
+void
+foo (int n)
+{
+  int i;
+  #pragma omp target   /* { dg-error "construct with nested 'teams' construct 
contains directives outside of the 'teams' construct" } */
+  {
+#pragma omp teams distribute dist_schedule (static, n + 4)
+for (i = 0; i < 8; i++)
+  ;
+#pragma omp teams distribute dist_schedule (static, n + 4)
+for (i = 0; i < 8; i++)
+  ;
+  }
+}
--- gcc/testsuite/gfortran.dg/gomp/pr99226.f90.jj   2021-02-24 
15:04:21.994919040 +0100
+++ gcc/testsuite/gfortran.dg/gomp/pr99226.f90  2021-02-24 15:03:55.618185552 
+0100
@@ -0,0 +1,13 @@
+! PR fortran/99226
+
+subroutine sub (n)
+   integer :: n, i
+   !$omp target! { dg-error "construct with nested 'teams' construct 
contains directives outside of the 'teams' construct" }
+   !$omp teams distribute dist_schedule (static,n+4)
+   do i = 1, 8
+   end do
+   !$omp teams distribute dist_schedule (static,n+4)
+   do i = 1, 8
+   end do
+   !$omp end target
+end


Jakub



[committed] libgcc: Avoid signed negation overflow in __powi?f2 [PR99236]

2021-02-24 Thread Jakub Jelinek via Gcc-patches
Hi!

When these functions are called with integer minimum, there is UB on the libgcc
side.  Fixed in the obvious way, the code in the end wants ABSU_EXPR behavior.

Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk
as obvious.

2021-02-24  Jakub Jelinek  

PR libgcc/99236
* libgcc2.c (__powisf2, __powidf2, __powitf2, __powixf2): Perform
negation of m in unsigned type.

--- libgcc/libgcc2.c.jj 2021-01-04 10:25:53.727065175 +0100
+++ libgcc/libgcc2.c2021-02-24 13:33:38.415470027 +0100
@@ -1834,7 +1834,7 @@ __fixunssfSI (SFtype a)
 TYPE
 NAME (TYPE x, int m)
 {
-  unsigned int n = m < 0 ? -m : m;
+  unsigned int n = m < 0 ? -(unsigned int) m : (unsigned int) m;
   TYPE y = n % 2 ? x : 1;
   while (n >>= 1)
 {


Jakub



[committed] [PR99123] inline-asm: Don't use decompose_mem_address to find used hard regs

2021-02-24 Thread Vladimir Makarov via Gcc-patches

The following patch solves

  https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99123

The patch was successfully bootstrapped and tested on x86-64


commit b6680c2084521d2612c3a08aa01b274078c4f3e3
Author: Vladimir N. Makarov 
Date:   Wed Feb 24 13:54:10 2021 -0500

[PR99123] inline-asm: Don't use decompose_mem_address to find used hard regs

Inline asm in question has empty constraint which means anything
including memory with invalid address.  To check used hard regs we
used decompose_mem_address which assumes memory with valid address.
The patch implements the same semantics without assuming valid
addresses.

gcc/ChangeLog:

PR inline-asm/99123
* lra-constraints.c (uses_hard_regs_p): Don't use decompose_mem_address.

gcc/testsuite/ChangeLog:

PR inline-asm/99123
* gcc.target/i386/pr99123.c: New.

diff --git a/gcc/lra-constraints.c b/gcc/lra-constraints.c
index 6a5aa41ed55..51acf7f0701 100644
--- a/gcc/lra-constraints.c
+++ b/gcc/lra-constraints.c
@@ -1904,16 +1904,6 @@ uses_hard_regs_p (rtx x, HARD_REG_SET set)
   return (x_hard_regno >= 0
 	  && overlaps_hard_reg_set_p (set, mode, x_hard_regno));
 }
-  if (MEM_P (x))
-{
-  struct address_info ad;
-
-  decompose_mem_address (, x);
-  if (ad.base_term != NULL && uses_hard_regs_p (*ad.base_term, set))
-	return true;
-  if (ad.index_term != NULL && uses_hard_regs_p (*ad.index_term, set))
-	return true;
-}
   fmt = GET_RTX_FORMAT (code);
   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
 {
diff --git a/gcc/testsuite/gcc.target/i386/pr99123.c b/gcc/testsuite/gcc.target/i386/pr99123.c
new file mode 100644
index 000..4f32547d5b8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr99123.c
@@ -0,0 +1,51 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+static inline void *
+baz (void *s, unsigned long c, unsigned int count)
+{
+  int d0, d1;
+  __asm__ __volatile__ (""
+: "=" (d0), "=" (d1)
+:"a" (c), "q" (count), "0" (count / 4), "" ((long) s)   /// "1"
+:"memory");
+  return s;
+}
+
+struct A
+{
+  unsigned long *a;
+};
+
+inline static void *
+bar (struct A *x, int y)
+{
+  char *ptr;
+
+  ptr = (void *) x->a[y >> 12];
+  ptr += y % (1UL << 12);
+  return (void *) ptr;
+}
+
+int
+foo (struct A *x, unsigned int *y, int z, int u)
+{
+  int a, b, c, d, e;
+
+  z += *y;
+  c = z + u;
+  a = (z >> 12) + 1;
+  do
+{
+  b = (a << 12);
+  d = b - z;
+  e = c - z;
+  if (e < d)
+d = e;
+  baz (bar (x, z), 0, d);
+  z = b;
+  a++;
+}
+  while (z < c);
+  return 0;
+}


[PATCH] match.pd: Use single_use for (T)(A) + CST -> (T)(A + CST) [PR95798]

2021-02-24 Thread Jakub Jelinek via Gcc-patches
Hi!

The r10-2806 change regressed following testcases, instead of doing
int -> unsigned long sign-extension once and then add 8, 16, ... 56 to it
for each of the memory access, it adds 8, 16, ... 56 in int mode and then
sign extends each.  So that means:
+   movq$0, (%rsp,%rax,8)
+   leal1(%rdx), %eax
+   cltq
+   movq$0, (%rsp,%rax,8)
+   leal2(%rdx), %eax
+   cltq
+   movq$0, (%rsp,%rax,8)
+   leal3(%rdx), %eax
+   cltq
+   movq$0, (%rsp,%rax,8)
+   leal4(%rdx), %eax
+   cltq
+   movq$0, (%rsp,%rax,8)
+   leal5(%rdx), %eax
+   cltq
+   movq$0, (%rsp,%rax,8)
+   leal6(%rdx), %eax
+   addl$7, %edx
+   cltq
+   movslq  %edx, %rdx
+   movq$0, (%rsp,%rax,8)
movq$0, (%rsp,%rdx,8)
-   movq$0, 8(%rsp,%rdx,8)
-   movq$0, 16(%rsp,%rdx,8)
-   movq$0, 24(%rsp,%rdx,8)
-   movq$0, 32(%rsp,%rdx,8)
-   movq$0, 40(%rsp,%rdx,8)
-   movq$0, 48(%rsp,%rdx,8)
-   movq$0, 56(%rsp,%rdx,8)
GCC 9 -> 10 change or:
-   movq$0, (%rsp,%rdx,8)
-   movq$0, 8(%rsp,%rdx,8)
-   movq$0, 16(%rsp,%rdx,8)
-   movq$0, 24(%rsp,%rdx,8)
-   movq$0, 32(%rsp,%rdx,8)
-   movq$0, 40(%rsp,%rdx,8)
-   movq$0, 48(%rsp,%rdx,8)
-   movq$0, 56(%rsp,%rdx,8)
+   movq$0, (%rsp,%rax,8)
+   leal1(%rdx), %eax
+   movq$0, (%rsp,%rax,8)
+   leal2(%rdx), %eax
+   movq$0, (%rsp,%rax,8)
+   leal3(%rdx), %eax
+   movq$0, (%rsp,%rax,8)
+   leal4(%rdx), %eax
+   movq$0, (%rsp,%rax,8)
+   leal5(%rdx), %eax
+   movq$0, (%rsp,%rax,8)
+   leal6(%rdx), %eax
+   movq$0, (%rsp,%rax,8)
+   leal7(%rdx), %eax
+   movq$0, (%rsp,%rax,8)
change on the other test.  While for the former case of
int there is due to signed integer overflow (unless -fwrapv)
the possibility to undo it e.g. during expansion, for the unsigned
case information is unfortunately lost.

The following patch adds single_use case which restores these testcases
but keeps the testcases the patch meant to improve as is.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2021-02-24  Jakub Jelinek  

PR target/95798
* match.pd ((T)(A) + CST -> (T)(A + CST)): Add single_use check.

* gcc.target/i386/pr95798-1.c: New test.
* gcc.target/i386/pr95798-2.c: New test.

--- gcc/match.pd.jj 2021-02-24 12:58:22.233006845 +0100
+++ gcc/match.pd2021-02-24 15:41:15.64030 +0100
@@ -2492,11 +2492,12 @@ (define_operator_list COND_TERNARY
 /* ((T)(A)) + CST -> (T)(A + CST)  */
 #if GIMPLE
   (simplify
-   (plus (convert SSA_NAME@0) INTEGER_CST@1)
+   (plus (convert@2 SSA_NAME@0) INTEGER_CST@1)
 (if (TREE_CODE (TREE_TYPE (@0)) == INTEGER_TYPE
  && TREE_CODE (type) == INTEGER_TYPE
  && TYPE_PRECISION (type) > TYPE_PRECISION (TREE_TYPE (@0))
- && int_fits_type_p (@1, TREE_TYPE (@0)))
+ && int_fits_type_p (@1, TREE_TYPE (@0))
+&& single_use (@2))
  /* Perform binary operation inside the cast if the constant fits
 and (A + CST)'s range does not overflow.  */
  (with
--- gcc/testsuite/gcc.target/i386/pr95798-1.c.jj2021-02-24 
15:58:06.935598077 +0100
+++ gcc/testsuite/gcc.target/i386/pr95798-1.c   2021-02-24 16:02:47.298504500 
+0100
@@ -0,0 +1,29 @@
+/* PR target/95798 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -masm=att" } */
+/* { dg-final { scan-assembler "1, 8\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target lp64 
} } } */
+/* { dg-final { scan-assembler "2, 16\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target 
lp64 } } } */
+/* { dg-final { scan-assembler "3, 24\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target 
lp64 } } } */
+/* { dg-final { scan-assembler "4, 32\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target 
lp64 } } } */
+/* { dg-final { scan-assembler "5, 40\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target 
lp64 } } } */
+/* { dg-final { scan-assembler "6, 48\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target 
lp64 } } } */
+/* { dg-final { scan-assembler "7, 56\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target 
lp64 } } } */
+
+void bar (unsigned long long *, int);
+
+void
+foo (int y, unsigned long long z)
+{
+  unsigned long long x[1024];
+  unsigned long long i = y % 127;
+  __builtin_memset (x, -1, sizeof (x));
+  x[i] = 0;
+  x[i + 1] = 1;
+  x[i + 2] = 2;
+  x[i + 3] = 3;
+  x[i + 4] = 4;
+  x[i + 5] = 5;
+  x[i + 6] = 6;
+  x[i + 7] = 7;
+  bar (x, y);
+}
--- gcc/testsuite/gcc.target/i386/pr95798-2.c.jj2021-02-24 
16:01:39.708250302 +0100
+++ gcc/testsuite/gcc.target/i386/pr95798-2.c   2021-02-24 16:03:57.497729907 
+0100
@@ -0,0 +1,29 @@
+/* PR target/95798 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -masm=att" } */
+/* { dg-final { scan-assembler "1, 8\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target lp64 
} } } */
+/* { dg-final { scan-assembler "2, 

Re: [PATCH 4/4] libstdc++: More efficient last day of month.

2021-02-24 Thread Jonathan Wakely via Gcc-patches

On 23/02/21 23:13 +0100, Matthias Kretz wrote:

I like the idea.

On Dienstag, 23. Februar 2021 14:25:10 CET Cassio Neri via Libstdc++ wrote:

((__m ^ (__m >> 3)) & 1) | 30


Note that you can drop the `& 1` part. 30 in binary is 0b0. ORing with a
value in [0, 0b01101] will only toggle the last bit.


Yeah looks right to me.

I've committed all Cassio's patches unchanged (except for whitespace
and the dates on the tests) but we can make this additional
improvement too.

Thanks, Cassio. Nice first contributions to libstdc++!



[PATCH] vrp: Handle VCE in vrp_simplify_cond_using_ranges [PR80635]

2021-02-24 Thread Jakub Jelinek via Gcc-patches
On Wed, Feb 24, 2021 at 01:32:02PM +0100, Richard Biener wrote:
> Small comment about the patch below, which otherwise is OK:
> 
> I think that !INTEGRAL_TYPE_P (TREE_TYPE (innerop)) is a sufficient
> condition here.

Unfortunately as shown during the bootstrap, that patch contained a fatal
thinko, 
> > +   default:
> > + break;

for default: we need return;

Here is a version of the patch that passed bootstrap/regtest on x86_64-linux
and i686-linux, ok for trunk?

2021-02-24  Jakub Jelinek  

PR tree-optimization/80635
* tree-vrp.c (vrp_simplify_cond_using_ranges): Also handle
VIEW_CONVERT_EXPR if modes are the same, innerop is integral and
has mode precision.

* g++.dg/warn/pr80635-1.C: New test.
* g++.dg/warn/pr80635-2.C: New test.

--- gcc/tree-vrp.c.jj   2021-02-24 12:56:58.573939572 +0100
+++ gcc/tree-vrp.c  2021-02-24 13:05:22.675326780 +0100
@@ -4390,11 +4390,22 @@ vrp_simplify_cond_using_ranges (vr_value
   gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
   tree innerop;
 
-  if (!is_gimple_assign (def_stmt)
- || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
+  if (!is_gimple_assign (def_stmt))
return;
 
-  innerop = gimple_assign_rhs1 (def_stmt);
+  switch (gimple_assign_rhs_code (def_stmt))
+   {
+   CASE_CONVERT:
+ innerop = gimple_assign_rhs1 (def_stmt);
+ break;
+   case VIEW_CONVERT_EXPR:
+ innerop = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
+ if (!INTEGRAL_TYPE_P (TREE_TYPE (innerop)))
+   return;
+ break;
+   default:
+ return;
+   }
 
   if (TREE_CODE (innerop) == SSA_NAME
  && !POINTER_TYPE_P (TREE_TYPE (innerop))
--- gcc/testsuite/g++.dg/warn/pr80635-1.C.jj2021-02-24 12:24:15.176834532 
+0100
+++ gcc/testsuite/g++.dg/warn/pr80635-1.C   2021-02-24 12:24:15.176834532 
+0100
@@ -0,0 +1,46 @@
+// PR tree-optimization/80635
+// { dg-do compile { target c++11 } }
+// { dg-options "-O2 -Wmaybe-uninitialized" }
+
+using size_t = decltype (sizeof (1));
+inline void *operator new (size_t, void *p) { return p; }
+template
+struct optional
+{
+  optional () : m_dummy (), live (false) {}
+  void emplace () { new (_item) T (); live = true; }
+  ~optional () { if (live) m_item.~T (); }
+
+  union
+  {
+struct {} m_dummy;
+T m_item;
+  };
+  bool live;
+};
+
+extern int get ();
+extern void set (int);
+
+struct A
+{
+  A () : m (get ()) {}
+  ~A () { set (m); }   // { dg-bogus "may be used uninitialized in this 
function" }
+
+  int m;
+};
+
+struct B
+{
+  B ();
+  ~B ();
+};
+
+void func ()
+{
+  optional maybe_a;
+  optional maybe_b;
+
+  maybe_a.emplace ();
+  maybe_b.emplace ();
+}
--- gcc/testsuite/g++.dg/warn/pr80635-2.C.jj2021-02-24 12:24:15.176834532 
+0100
+++ gcc/testsuite/g++.dg/warn/pr80635-2.C   2021-02-24 12:24:15.176834532 
+0100
@@ -0,0 +1,31 @@
+// PR tree-optimization/80635
+// { dg-do compile { target c++17 } }
+// { dg-options "-O2 -Wmaybe-uninitialized" }
+
+#include 
+
+extern int get ();
+extern void set (int);
+
+struct A
+{
+  A () : m (get ()) {}
+  ~A () { set (m); }   // { dg-bogus "may be used uninitialized in this 
function" }
+
+  int m;
+};
+
+struct B
+{
+  B ();
+  ~B ();
+};
+
+void func ()
+{
+  std::optional maybe_a;
+  std::optional maybe_b;
+
+  maybe_a.emplace ();
+  maybe_b.emplace ();
+}


Jakub



Re: [WIP] Re: [PATCH] openmp: Fix intermittent hanging of task-detach-6 libgomp tests [PR98738]

2021-02-24 Thread Kwok Cheung Yeung

On 22/02/2021 1:49 pm, Jakub Jelinek wrote:

I see three issues with the union of completion_sem and detach_team done
that way.

1) while linux --enable-futex and accel gomp_sem_t is small (int), rtems
and especially posix gomp_sem_t is large; so while it might be a good
idea to inline gomp_sem_t on config/{linux,accel} into the union, for
the rest it might be better to use indirection; if it is only for the
undeferred tasks, it could be just using an automatic variable and
put into the struct address of that; could be done either always,
or define some macro in config/{linux,accel}/sem.h that gomp_sem_t is
small and decide on the indirection based on that macro


I think a pointer to an automatic variable would be simplest.


2) kind == GOMP_TASK_UNDEFERRED is true also for the deferred tasks while
running the cpyfn callback; guess this could be dealt with making sure
the detach handling is done only after
   thr->task = task;
   if (cpyfn)
 {
   cpyfn (arg, data);
   task->copy_ctors_done = true;
 }
   else
 memcpy (arg, data, arg_size);
   thr->task = parent;
   task->kind = GOMP_TASK_WAITING;
   task->fn = fn;
   task->fn_data = arg;
   task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1;
I see you've instead removed the GOMP_TASK_UNDEFERRED but the rationale
for that is that the copy constructors are being run synchronously


Can anything in cpyfn make use of the fact that kind==GOMP_TASK_UNDEFERRED while 
executing it? Anyway, if we want to keep this, then I suppose we could just add 
an extra field deferred_p that does not change for the lifetime of the task to 
indicate that the task is 'really' a deferred task.



3) kind is not constant, for the deferred tasks it can change over the
lifetime of the task, as you've said in the comments, it is kind ==
GOMP_TASK_UNDEFERRED vs. other values; while the changes of task->kind
are done while holding the task lock, omp_fulfill_event reads it before
locking that lock, so I think it needs to be done using
if (__atomic_load_n (>kind, MEMMODEL_RELAXED) == GOMP_TASK_UNDEFERRED)
Pedantically the stores to task->kind also need to be done
with __atomic_store_n MEMMODEL_RELAXED.


If we check task->deferred_p instead (which never changes for a task after 
instantiation), is that still necessary?



Now, similarly for 3) on task->kind, task->detach_team is similar case,
again, some other omp_fulfill_event can clear it (under lock, but still read
outside of the lock), so it
probably should be read with
   struct gomp_team *team
 = __atomic_load_n (>detach_team, MEMMODEL_RELAXED);
And again, pedantically the detach_team stores should be atomic relaxed
stores too.



Done.


Looking at gomp_task_run_post_remove_parent, doesn't that function
already handle the in_taskwait and in_depend_wait gomp_sem_posts?



And into gomp_task_run_post_remove_taskgroup, doesn't that already
handle the in_taskgroup_wait gomp_sem_post?


The extra code has been removed.


- in gomp_barrier_handle_tasks the reason for if (new_tasks > 1)
is that if there is a single dependent task, the current thread
just finished handling one task and so can take that single task and so no
need to wake up.  While in the omp_fulfill_event case, even if there
is just one new task, we need to schedule it to some thread and so
is desirable to wake some thread.


In that case, we could just do 'if (new_tasks > 0)' instead?

> All we know
> (if team == gomp_thread ()->ts.team) is that at least one thread is doing
> something else but that one could be busy for quite some time.

Well, it should still get around to the new task eventually, so there is no 
problem in terms of correctness here. I suppose we could always wake up one more 
thread than strictly necessary, but that might have knock-on effects on 
performance elsewhere?



And the other case is the omp_fulfill_event call from unshackeled thread,
i.e. team != gomp_thread ()->ts.team.
Here, e.g. what gomp_target_task_completion talks about applies:
   /* I'm afraid this can't be done after releasing team->task_lock,
  as gomp_target_task_completion is run from unrelated thread and
  therefore in between gomp_mutex_unlock and gomp_team_barrier_wake
  the team could be gone already.  */
Even there are 2 different cases.
One is where team->task_running_count > 0, at that point we know
at least one task is running and so the only thing that is unsafe
gomp_team_barrier_wake (>barrier, do_wake);
after gomp_mutex_unlock (>task_lock); - there is a possibility
that in between the two calls the thread running omp_fulfill_event
gets interrupted or just delayed and the team finishes barrier and
is freed too.  So the gomp_team_barrier_wake needs to be done before
the unlock in that case.


The lock is now freed after the call for unshackeled threads, before otherwise.


And then there is the case where 

Re: [PATCH v2 0/5] RISC-V big endian support

2021-02-24 Thread Marcus Comstedt


Hi Kito,

Kito Cheng  writes:

> I just spend some time on those two testcase, I think this those two
> testcase could just skip in big-endinan.

Well, that sounds like a pretty big cop out.  If the software doesn't
behave like we expect it too I feel we should at least have some idea
_why_...


>> FAIL: gcc.target/riscv/shift-and-1.c scan-assembler-not andi
>> FAIL: gcc.target/riscv/shift-and-2.c scan-assembler-not andi
>
> However seems like rv32be has still has some strange fail there,
> do you mind take a look for that?

Do you mean in those two test cases specifically?  Or rv32be in
general?


  // Marcus




Re: [PATCH] libstdc++: Update baseline symbols for {aarch64,ia64,m68k,riscv64}-linux

2021-02-24 Thread Jonathan Wakely via Gcc-patches

On 23/02/21 16:18 +0100, Andreas Schwab wrote:

libstdc++-v3/
* config/abi/post/aarch64-linux-gnu/baseline_symbols.txt: Update.
* config/abi/post/ia64-linux-gnu/baseline_symbols.txt: Update.
* config/abi/post/m68k-linux-gnu/baseline_symbols.txt: Update.
* config/abi/post/riscv64-linux-gnu/baseline_symbols.txt: Update.
---
.../aarch64-linux-gnu/baseline_symbols.txt| 104 +++
.../post/ia64-linux-gnu/baseline_symbols.txt  | 121 ++
.../post/m68k-linux-gnu/baseline_symbols.txt  | 121 ++
.../riscv64-linux-gnu/baseline_symbols.txt| 116 +
4 files changed, 462 insertions(+)





+FUNC:_ZNSt9once_flag11_M_activateEv@@GLIBCXX_3.4.29
+FUNC:_ZNSt9once_flag9_M_finishEb@@GLIBCXX_3.4.29


I need to revert the changes to once_flag, but I haven't decided
whether to leave the new symbols present (and hidden behind an
ABI-changing macro) or remove them for now.

We can always remove these two lines from each baselines symbols file
if neded.

Patch is OK for trunk, thanks!

(The powerpc64 bug is fixed too, so those can be safely
regenerated now).




Committed: cris: support -fstack-usage

2021-02-24 Thread Hans-Peter Nilsson via Gcc-patches
All the bits were there, used with a pre-existing
-mmax-stackframe=SIZE which unfortunately seems to lack
test-cases.

Note that the early-return for -mno-prologue-epilogue (what
some targets call -mnaked) is deliberately not clearing
current_function_static_stack_size, as I consider that
erroneous usage but don't really care to emit a better error
message.

For stack-usage-1.c, like most ILP32 targets, CRIS (at -O0)
needs 4 bytes for the return-address.  The default size of
256 seems ill chosen but not worth fixing.

gcc:
* config/cris/cris.c (cris_expand_prologue): Set
current_function_static_stack_size, if flag_stack_usage_info.

gcc/testsuite:
* gcc.dg/stack-usage-1.c: Adjust for CRIS.
---
 gcc/config/cris/cris.c   | 5 +
 gcc/testsuite/gcc.dg/stack-usage-1.c | 2 ++
 2 files changed, 7 insertions(+)

diff --git a/gcc/config/cris/cris.c b/gcc/config/cris/cris.c
index 8a42aa16da13..0774ed8299ad 100644
--- a/gcc/config/cris/cris.c
+++ b/gcc/config/cris/cris.c
@@ -2892,8 +2892,13 @@ cris_expand_prologue (void)
   framesize += size + cfoa_size;
 }
 
+  /* FIXME: -mmax-stackframe=SIZE is obsoleted; use -Wstack-usage=SIZE
+ instead.  Make it an alias?  */
   if (cris_max_stackframe && framesize > cris_max_stackframe)
 warning (0, "stackframe too big: %d bytes", framesize);
+
+  if (flag_stack_usage_info)
+current_function_static_stack_size = framesize;
 }
 
 /* The expander for the epilogue pattern.  */
diff --git a/gcc/testsuite/gcc.dg/stack-usage-1.c 
b/gcc/testsuite/gcc.dg/stack-usage-1.c
index be1254a7348d..93cfe7c01639 100644
--- a/gcc/testsuite/gcc.dg/stack-usage-1.c
+++ b/gcc/testsuite/gcc.dg/stack-usage-1.c
@@ -103,6 +103,8 @@
 #define SIZE 252
 #elif defined (__csky__)
 #  define SIZE 252
+#elif defined (__CRIS__)
+#  define SIZE 252
 #else
 #  define SIZE 256
 #endif
-- 
2.11.0



Re: [PATCH,rs6000] [v2] Optimize pcrel access of globals

2021-02-24 Thread Segher Boessenkool
Hi!

On Tue, Feb 23, 2021 at 09:48:28AM +0100, Richard Biener wrote:
> On Tue, Feb 23, 2021 at 4:48 AM acsawdey--- via Gcc-patches
>  wrote:
> >
> > From: Aaron Sawdey 
> >
> > This patch implements a RTL pass that looks for pc-relative loads of the
> > address of an external variable using the PCREL_GOT relocation and a
> > single load or store that uses that external address.
> >
> > Produced by a cast of thousands:
> >  * Michael Meissner
> >  * Peter Bergner
> >  * Bill Schmidt
> >  * Alan Modra
> >  * Segher Boessenkool
> >  * Aaron Sawdey
> >
> > This incorporates the changes requested in Segher's review. A few things I
> > did not change were the insn-at-a-time scan that could be done with DF, and
> > I did not change to using statistics.[ch] for the counters struct. I did try
> > to improve the naming, and rewrote a number of comments to make them 
> > consistent
> > with the code, and generally tried to make things more readable.
> >
> > OK for trunk if bootstrap/regtest passes?
> 
> stage1 please?

Versions of this patch were first submitted in August.  Of 2019.

We have quite a lot of experience with it so far.  There are no known
wrong-code (or ICE) problems with it.

All of this is only when targetting power10 (which isn't shipping
hardware yet), and is behind a flag anyway, so you can easily disable
the optimisation.

So, there is virtually no risk.

On the other hand, we do want to get this into the hands of everyone
who should test it, and trunk works so much better for that than private
builds or patch sets.

If it misses 11.1, we'll have to backport it to 11.2 .


So, I'd really like it to get in now.  Do any of these arguments change
your position on that?


Segher


Re: [PATCH 1/4] libstdc++: More efficient date from days.

2021-02-24 Thread Jonathan Wakely via Gcc-patches

On 23/02/21 13:24 +, Cassio Neri via Libstdc++ wrote:

This patch reimplements std::chrono::year_month_day::_S_from_days() which
retrieves a date from the number of elapsed days since 1970/01/01.  The new
implementation is based on Proposition 6.3 of Neri and Schneider, "Euclidean
Affine Functions and Applications to Calendar Algorithms" available at
https://arxiv.org/abs/2102.06959.

The aforementioned paper benchmarks the implementation against several
counterparts, including libc++'s (which is identical to the current
implementation).  The results, shown in Figure 4, indicate the new algorithm is
2.2 times faster than the current one.

The patch adds a test which loops through all integers in [-12687428, 11248737],
and for each of them, gets the corresponding date and compares the result
against its expected value.  The latter is calculated using a much simpler and
easy to understand algorithm but which is also much slower.

The interval used in the test covers the full range of values for which a
roundtrip must work [time.cal.ymd.members].  Despite its completeness the test
runs in a matter of seconds.

libstdc++-v3/ChangeLog:

   * include/std/chrono:
   * testsuite/std/time/year_month_day/3.cc: New test.


Thanks! I'm committing this to trunk (it only changes new C++20
material so OK during stage 4 ... and anyway it's both faster and
better tested than the old code).

I've tweaked it slightly to keep some lines below 80 columns, but no
changes except whitespace.





Re: [committed] libstdc++: Define std::to_chars overloads for __ieee128 [PR 98389]

2021-02-24 Thread Jonathan Wakely via Gcc-patches

On 24/02/21 17:00 +, Jonathan Wakely via Libstdc++ wrote:

   libstdc++: Define std::to_chars overloads for __ieee128 [PR 98389]

   This adds overloads of std::to_chars for powerpc64's __ieee128, so that
   std::to_chars can be used for long double when -mabi=ieeelongdouble is
   in used.

   Eventually we'll want to extend these new overloads to work for
   __float128 on all targets that support that type. For now, we're only
   doing it for powerpc64 when the new long double type is supported in
   parallel to the old long double type.

   Additionally the existing std::to_chars overloads for long double
   are given the right symbol version, resolving PR libstdc++/98389.

   libstdc++-v3/ChangeLog:

   PR libstdc++/98389
   * config/abi/pre/gnu.ver (GLIBCXX_3.4.29): Do not match to_chars
   symbols for long double arguments mangled as 'g'.
   * config/os/gnu-linux/ldbl-extra.ver: Likewise.
   * config/os/gnu-linux/ldbl-ieee128-extra.ver: Likewise.
   * src/c++17/Makefile.am [GLIBCXX_LDBL_ALT128_COMPAT_TRUE]:
   Use -mabi=ibmlongdouble for floating_to_chars.cc.
   * src/c++17/Makefile.in: Regenerate.
   * src/c++17/floating_to_chars.cc (floating_type_traits_binary128):
   New type defining type traits of IEEE binary128 format.
   (floating_type_traits<__float128>): Define specialization.
   (floating_type_traits): Define in terms of
   floating_type_traits_binary128 when appropriate.
   (floating_to_shortest_scientific): Handle __float128.
   (sprintf_ld): New function template for printing a long double
   or __ieee128 value using sprintf.
   (__floating_to_chars_shortest, __floating_to_chars_precision):
   Use sprintf_ld.
   (to_chars): Define overloads for __float128.

diff --git a/libstdc++-v3/config/abi/pre/gnu.ver 
b/libstdc++-v3/config/abi/pre/gnu.ver
index d0c8066ce83..a2c151d11ac 100644
--- a/libstdc++-v3/config/abi/pre/gnu.ver
+++ b/libstdc++-v3/config/abi/pre/gnu.ver
@@ -2394,11 +2394,11 @@ GLIBCXX_3.4.29 {
_ZNSt9once_flag9_M_finishEb;

# std::to_chars(char*, char*, [float|double|long double])
-_ZSt8to_charsPcS_[defg];
+_ZSt8to_charsPcS_[def];
# std::to_chars(char*, char*, [float|double|long double], chars_format)
-_ZSt8to_charsPcS_[defg]St12chars_format;
+_ZSt8to_charsPcS_[def]St12chars_format;
# std::to_chars(char*, char*, [float|double|long double], chars_format, int)
-_ZSt8to_charsPcS_[defg]St12chars_formati;
+_ZSt8to_charsPcS_[def]St12chars_formati;


N.B. when we add std::to_chars for __float128 (on other targets where
it's not __ieee128) and we restore the _ZSt8to_charsPcS_g* symbols
they'll have a new symbol version (probably GLIBCXX_3.4.30). When we
add those patterns into config/abi/pre/gnu.ver we'll need to do it
conditionally, so that for powerpc64le those symbols still get the
GLIBCXX_LDBL_3.4.29 version from config/os/gnu-linux/ldbl-extra.ver
e.g.

GLIBCXX_3.4.30 {

#ifndef _GLIBCXX_LONG_DOUBLE_ALT128_COMPAT
_ZSt8to_charsPcS_g*;
#endif

} GLIBCXX_3.4.29;

This isn't *strictly* necessary, because GNU ld (unintentionally?)
allows a symbol to match more than one version and it seems to use the
later version. But we shouldn't rely on that. Solaris ld gives an
error in that case, but that's not a concern here because Solaris ld
isn't used for powerpc64le-linux (where those symbols want to use the
GLIBCXX_LDBL_3.4.29 version). But it would be better to make the
linker script clean and not rely on the GNU ld quirk.

I'm noting that here in case I get hit by a bus before that change is
needed.



} GLIBCXX_3.4.28;

diff --git a/libstdc++-v3/config/os/gnu-linux/ldbl-extra.ver 
b/libstdc++-v3/config/os/gnu-linux/ldbl-extra.ver
index b4f3af0f9d9..8c7c783ba58 100644
--- a/libstdc++-v3/config/os/gnu-linux/ldbl-extra.ver
+++ b/libstdc++-v3/config/os/gnu-linux/ldbl-extra.ver
@@ -42,6 +42,7 @@ GLIBCXX_LDBL_3.4.21 {

GLIBCXX_LDBL_3.4.29 {
  _ZSt10from_charsPKcS0_RgSt12chars_format;
+  _ZSt8to_charsPcS_g*;
} GLIBCXX_LDBL_3.4.21;





Re: [PATCH 1/2] libstdc++: Robustify long double std::to_chars testcase [PR98384]

2021-02-24 Thread Jonathan Wakely via Gcc-patches

On 24/02/21 12:04 -0500, Patrick Palka via Libstdc++ wrote:

On Wed, 24 Feb 2021, Jonathan Wakely wrote:


On 23/02/21 11:30 -0500, Patrick Palka via Libstdc++ wrote:
> On Mon, 22 Feb 2021, Patrick Palka wrote:
>
> > This makes the hexadecimal section of the long double std::to_chars
> > testcase more robust by avoiding false-negative FAILs due to printf
> > using a different leading hex digit than us, and by additionally
> > verifying the correctness of the hexadecimal form via round-tripping
> > through std::from_chars.
> >
> > Tested on x86, x86_64, powerpc64be, powerpc64le and aarch64.  Does this
> > look OK for trunk?
>
> The commit message could explain the issue better, so here's v2 with a
> more detailed commit message.
>
> -- >8 --
>
> Subject: [PATCH] libstdc++: Robustify long double std::to_chars testcase
> [PR98384]
>
> The long double std::to_chars testcase currently verifies the
> correctness of its output by comparing it to that of printf, so if
> there's a mismatch between to_chars and printf, the test FAILs.  This
> works well for the scientific, fixed and general formatting modes,
> because the corresponding printf conversion specifiers (%e, %f and %g)
> are rigidly specified.
>
> But this doesn't work so well for the hex formatting mode because the
> corresponding printf conversion specifier %a is more flexibly specified.
> For instance, the hexadecimal forms 0x1p+0, 0x2p-1, 0x4p-2 and 0x8p-3
> are all equivalent and valid outputs of the %a specifier for the number
> 1.  The apparent freedom here is the choice of leading hex digit -- the
> standard just requires that the leading hex digit is nonzero for
> normalized numbers.
>
> Currently, our hexadecimal formatting implementation uses 0/1/2 as the
> leading hex digit for floating point types that have an implicit leading
> mantissa bit which in practice means all supported floating point types
> except x86 long double.  The latter type has a 64 bit mantissa with an
> explicit leading mantissa bit, and for this type our implementation uses
> the most significant four bits of the mantissa as leading hex digit.
> This seems to be consistent with most printf implementations, but not
> all, as PR98384 illustrates.
>
> In order to avoid false-positive FAILs due to arbitrary disagreement
> between to_chars and printf about the choice of leading hex digit, this
> patch makes the testcase's verification via printf conditional on the
> leading hex digits first agreeing.  An additional verification step is
> also added: round-tripping the output of to_chars through from_chars
> should yield the original value.
>
> Tested on x86, x86_64, powerpc64be, powerpc64le and aarch64.  Does this
> look OK for trunk?

> @@ -50,6 +51,38 @@ namespace detail
> void
> test01()
> {
> +  // Verifies correctness of the hexadecimal form [BEGIN,END) for VALUE by
> +  // round-tripping it through from_chars (if available).
> +  auto verify_via_from_chars = [] (char *begin, char *end, long double
> value) {
> +#if __cpp_lib_to_chars >= 201611L || _GLIBCXX_HAVE_USELOCALE

This is currently going to fail, because we don't actually define
__cpp_lib_to_chars yet (we should fix that!)

Is checking the feature test macro here useful? We know that
floating-point from_chars was committed before to_chars, so if this
test is running, we should have from_chars (modulo uselocale being
available, so that check is right). Is this to make the test usable
for other C++ std::lib implementations?


This preprocessor check is copied from from_chars/{5,6}.cc, which I


I was going to say "which idiot wrote that then?" and then I realised
that the check is fine and I just misread the || as &&. Doh.



figured should be appropriate to use here as well.  I figured we'd
want to adjust each of these checks after we define __cpp_lib_to_chars
appropriately anyway (e.g. if __cpp_lib_to_chars is conditioned on
uselocale being available, then the three tests should be changed just
look at __cpp_lib_to_chars, IIUC).


Agreed.

The patch is fine for trunk, sorry for the noise.

Thanks.



Re: [PATCH 1/2] libstdc++: Robustify long double std::to_chars testcase [PR98384]

2021-02-24 Thread Patrick Palka via Gcc-patches
On Wed, 24 Feb 2021, Jonathan Wakely wrote:

> On 23/02/21 11:30 -0500, Patrick Palka via Libstdc++ wrote:
> > On Mon, 22 Feb 2021, Patrick Palka wrote:
> > 
> > > This makes the hexadecimal section of the long double std::to_chars
> > > testcase more robust by avoiding false-negative FAILs due to printf
> > > using a different leading hex digit than us, and by additionally
> > > verifying the correctness of the hexadecimal form via round-tripping
> > > through std::from_chars.
> > > 
> > > Tested on x86, x86_64, powerpc64be, powerpc64le and aarch64.  Does this
> > > look OK for trunk?
> > 
> > The commit message could explain the issue better, so here's v2 with a
> > more detailed commit message.
> > 
> > -- >8 --
> > 
> > Subject: [PATCH] libstdc++: Robustify long double std::to_chars testcase
> > [PR98384]
> > 
> > The long double std::to_chars testcase currently verifies the
> > correctness of its output by comparing it to that of printf, so if
> > there's a mismatch between to_chars and printf, the test FAILs.  This
> > works well for the scientific, fixed and general formatting modes,
> > because the corresponding printf conversion specifiers (%e, %f and %g)
> > are rigidly specified.
> > 
> > But this doesn't work so well for the hex formatting mode because the
> > corresponding printf conversion specifier %a is more flexibly specified.
> > For instance, the hexadecimal forms 0x1p+0, 0x2p-1, 0x4p-2 and 0x8p-3
> > are all equivalent and valid outputs of the %a specifier for the number
> > 1.  The apparent freedom here is the choice of leading hex digit -- the
> > standard just requires that the leading hex digit is nonzero for
> > normalized numbers.
> > 
> > Currently, our hexadecimal formatting implementation uses 0/1/2 as the
> > leading hex digit for floating point types that have an implicit leading
> > mantissa bit which in practice means all supported floating point types
> > except x86 long double.  The latter type has a 64 bit mantissa with an
> > explicit leading mantissa bit, and for this type our implementation uses
> > the most significant four bits of the mantissa as leading hex digit.
> > This seems to be consistent with most printf implementations, but not
> > all, as PR98384 illustrates.
> > 
> > In order to avoid false-positive FAILs due to arbitrary disagreement
> > between to_chars and printf about the choice of leading hex digit, this
> > patch makes the testcase's verification via printf conditional on the
> > leading hex digits first agreeing.  An additional verification step is
> > also added: round-tripping the output of to_chars through from_chars
> > should yield the original value.
> > 
> > Tested on x86, x86_64, powerpc64be, powerpc64le and aarch64.  Does this
> > look OK for trunk?
> 
> > @@ -50,6 +51,38 @@ namespace detail
> > void
> > test01()
> > {
> > +  // Verifies correctness of the hexadecimal form [BEGIN,END) for VALUE by
> > +  // round-tripping it through from_chars (if available).
> > +  auto verify_via_from_chars = [] (char *begin, char *end, long double
> > value) {
> > +#if __cpp_lib_to_chars >= 201611L || _GLIBCXX_HAVE_USELOCALE
> 
> This is currently going to fail, because we don't actually define
> __cpp_lib_to_chars yet (we should fix that!)
> 
> Is checking the feature test macro here useful? We know that
> floating-point from_chars was committed before to_chars, so if this
> test is running, we should have from_chars (modulo uselocale being
> available, so that check is right). Is this to make the test usable
> for other C++ std::lib implementations?

This preprocessor check is copied from from_chars/{5,6}.cc, which I
figured should be appropriate to use here as well.  I figured we'd
want to adjust each of these checks after we define __cpp_lib_to_chars
appropriately anyway (e.g. if __cpp_lib_to_chars is conditioned on
uselocale being available, then the three tests should be changed just
look at __cpp_lib_to_chars, IIUC).

> 
> > +long double roundtrip;
> > +auto result = from_chars(begin, end, roundtrip, chars_format::hex);
> > +VERIFY( result.ec == errc{} );
> > +VERIFY( result.ptr == end );
> > +VERIFY( roundtrip == value );
> > +#endif
> 
> 



[committed] libstdc++: Define std::to_chars overloads for __ieee128 [PR 98389]

2021-02-24 Thread Jonathan Wakely via Gcc-patches
This adds overloads of std::to_chars for powerpc64's __ieee128, so that
std::to_chars can be used for long double when -mabi=ieeelongdouble is
in used.

Eventually we'll want to extend these new overloads to work for
__float128 on all targets that support that type. For now, we're only
doing it for powerpc64 when the new long double type is supported in
parallel to the old long double type.

Additionally the existing std::to_chars overloads for long double
are given the right symbol version, resolving PR libstdc++/98389.

libstdc++-v3/ChangeLog:

PR libstdc++/98389
* config/abi/pre/gnu.ver (GLIBCXX_3.4.29): Do not match to_chars
symbols for long double arguments mangled as 'g'.
* config/os/gnu-linux/ldbl-extra.ver: Likewise.
* config/os/gnu-linux/ldbl-ieee128-extra.ver: Likewise.
* src/c++17/Makefile.am [GLIBCXX_LDBL_ALT128_COMPAT_TRUE]:
Use -mabi=ibmlongdouble for floating_to_chars.cc.
* src/c++17/Makefile.in: Regenerate.
* src/c++17/floating_to_chars.cc (floating_type_traits_binary128):
New type defining type traits of IEEE binary128 format.
(floating_type_traits<__float128>): Define specialization.
(floating_type_traits): Define in terms of
floating_type_traits_binary128 when appropriate.
(floating_to_shortest_scientific): Handle __float128.
(sprintf_ld): New function template for printing a long double
or __ieee128 value using sprintf.
(__floating_to_chars_shortest, __floating_to_chars_precision):
Use sprintf_ld.
(to_chars): Define overloads for __float128.

Tested powerpc64le-linux (with and without __ieee128 support in glibc)
and powerpc64-linux (big endian). Committed to trunk.

commit f90027d18a94d02ba8f3b7503c5f0835f432a89e
Author: Jonathan Wakely 
Date:   Fri Feb 19 13:36:41 2021

libstdc++: Define std::to_chars overloads for __ieee128 [PR 98389]

This adds overloads of std::to_chars for powerpc64's __ieee128, so that
std::to_chars can be used for long double when -mabi=ieeelongdouble is
in used.

Eventually we'll want to extend these new overloads to work for
__float128 on all targets that support that type. For now, we're only
doing it for powerpc64 when the new long double type is supported in
parallel to the old long double type.

Additionally the existing std::to_chars overloads for long double
are given the right symbol version, resolving PR libstdc++/98389.

libstdc++-v3/ChangeLog:

PR libstdc++/98389
* config/abi/pre/gnu.ver (GLIBCXX_3.4.29): Do not match to_chars
symbols for long double arguments mangled as 'g'.
* config/os/gnu-linux/ldbl-extra.ver: Likewise.
* config/os/gnu-linux/ldbl-ieee128-extra.ver: Likewise.
* src/c++17/Makefile.am [GLIBCXX_LDBL_ALT128_COMPAT_TRUE]:
Use -mabi=ibmlongdouble for floating_to_chars.cc.
* src/c++17/Makefile.in: Regenerate.
* src/c++17/floating_to_chars.cc (floating_type_traits_binary128):
New type defining type traits of IEEE binary128 format.
(floating_type_traits<__float128>): Define specialization.
(floating_type_traits): Define in terms of
floating_type_traits_binary128 when appropriate.
(floating_to_shortest_scientific): Handle __float128.
(sprintf_ld): New function template for printing a long double
or __ieee128 value using sprintf.
(__floating_to_chars_shortest, __floating_to_chars_precision):
Use sprintf_ld.
(to_chars): Define overloads for __float128.

diff --git a/libstdc++-v3/config/abi/pre/gnu.ver 
b/libstdc++-v3/config/abi/pre/gnu.ver
index d0c8066ce83..a2c151d11ac 100644
--- a/libstdc++-v3/config/abi/pre/gnu.ver
+++ b/libstdc++-v3/config/abi/pre/gnu.ver
@@ -2394,11 +2394,11 @@ GLIBCXX_3.4.29 {
 _ZNSt9once_flag9_M_finishEb;
 
 # std::to_chars(char*, char*, [float|double|long double])
-_ZSt8to_charsPcS_[defg];
+_ZSt8to_charsPcS_[def];
 # std::to_chars(char*, char*, [float|double|long double], chars_format)
-_ZSt8to_charsPcS_[defg]St12chars_format;
+_ZSt8to_charsPcS_[def]St12chars_format;
 # std::to_chars(char*, char*, [float|double|long double], chars_format, 
int)
-_ZSt8to_charsPcS_[defg]St12chars_formati;
+_ZSt8to_charsPcS_[def]St12chars_formati;
 
 } GLIBCXX_3.4.28;
 
diff --git a/libstdc++-v3/config/os/gnu-linux/ldbl-extra.ver 
b/libstdc++-v3/config/os/gnu-linux/ldbl-extra.ver
index b4f3af0f9d9..8c7c783ba58 100644
--- a/libstdc++-v3/config/os/gnu-linux/ldbl-extra.ver
+++ b/libstdc++-v3/config/os/gnu-linux/ldbl-extra.ver
@@ -42,6 +42,7 @@ GLIBCXX_LDBL_3.4.21 {
 
 GLIBCXX_LDBL_3.4.29 {
   _ZSt10from_charsPKcS0_RgSt12chars_format;
+  _ZSt8to_charsPcS_g*;
 } GLIBCXX_LDBL_3.4.21;
 
 CXXABI_LDBL_1.3 {
diff --git 

c++: modules & -fpreprocessed [PR 99072]

2021-02-24 Thread Nathan Sidwell
When we	read preprocessed source, we deal with a couple	of special 
location lines at the start of the file.  These provide	information 
about the original filename of the source and the current directory, so 
we can process the source in the same manner.  When updating that code, 
I had a somewhat philosophical question: Should the line table contain 
evidence of the	filename the user provided to the compiler?  I figured 
to leave it there, as it did no harm.  But this	defect shows an issue. 
It's in the line table and our (non optimizing) line table serializer 
emits that filename.  Which means if one re-preprocesses the original 
source to a differently-named intermediate file, the resultant CMI is 
different.  Boo.  That's a difference that doesn't matter, except the 
CRC matching then fails.  We should elide the filename, so that one can 
preprocess to mktemp intermediate filenames for whatever reason.


This patch takes the approach of expunging it from the line table -- so 
the line table will end up with exactly the same form.  That seems a 
better bet than trying to fix up mismatching line tables in CMI emission.


PR c++/99072
libcpp/
* init.c (read_original_filename): Expunge all evidence of the
original filename.
gcc/testsuite/
* g++.dg/modules/pr99072.H: New.

--
Nathan Sidwell
diff --git c/gcc/testsuite/g++.dg/modules/pr99072.H w/gcc/testsuite/g++.dg/modules/pr99072.H
new file mode 100644
index 000..eda0c07f9e2
--- /dev/null
+++ w/gcc/testsuite/g++.dg/modules/pr99072.H
@@ -0,0 +1,10 @@
+# 0 "REALNAME"
+# 0 ""
+# 0 ""
+# 0 ""
+# 1 "/usr/include/stdc-predef.h" 1 3 4
+# 0 "" 2
+# 1 "REALNAME"
+
+// { dg-additional-options {-fmodule-header -fpreprocessed -fdump-lang-module-lineno} }
+// { dg-final { scan-lang-dump { 4 source file names\n Source file\[0\]=REALNAME\n Source file\[1\]=\n Source file\[2\]=\n Source file\[3\]=/usr/include/stdc-predef.h\n} module } }
diff --git c/libcpp/init.c w/libcpp/init.c
index 17b0d251cda..68ed2c761b9 100644
--- c/libcpp/init.c
+++ w/libcpp/init.c
@@ -752,6 +752,23 @@ read_original_filename (cpp_reader *pfile)
   if (_cpp_handle_directive (pfile, token->flags & PREV_WHITE))
 	{
 	  read_original_directory (pfile);
+
+	  auto *penult = _check_ordinary
+	(LINEMAPS_LAST_MAP (pfile->line_table, false))[-1];
+	  if (penult[1].reason == LC_RENAME_VERBATIM)
+	{
+	  /* Expunge any evidence of the original linemap.  */
+	  pfile->line_table->highest_location
+		= pfile->line_table->highest_line
+		= penult[0].start_location;
+
+	  penult[1].start_location = penult[0].start_location;
+	  penult[1].reason = penult[0].reason;
+	  penult[0] = penult[1];
+	  pfile->line_table->info_ordinary.used--;
+	  pfile->line_table->info_ordinary.cache = 0;
+	}
+
 	  return true;
 	}
 }


Re: [PATCH v2] rs6000: Convert the vector element register to SImode [PR98914]

2021-02-24 Thread Segher Boessenkool
Hi!

On Wed, Feb 24, 2021 at 09:06:24AM +0800, Xionghu Luo wrote:
> vec_insert defines the element argument type to be signed int by ELFv2
> ABI, When expanding a vector with a variable rtx, convert the rtx type
> SImode.

But that is true for the intrinsics, not for all other callers of
rs6000_expand_vector_init.  See
 as well?

So I don't think you do this in the right place.  You can convince me
with good arguments of course :-)


Segher


Re: [PATCH 1/2] libstdc++: Robustify long double std::to_chars testcase [PR98384]

2021-02-24 Thread Jonathan Wakely via Gcc-patches

On 23/02/21 11:30 -0500, Patrick Palka via Libstdc++ wrote:

On Mon, 22 Feb 2021, Patrick Palka wrote:


This makes the hexadecimal section of the long double std::to_chars
testcase more robust by avoiding false-negative FAILs due to printf
using a different leading hex digit than us, and by additionally
verifying the correctness of the hexadecimal form via round-tripping
through std::from_chars.

Tested on x86, x86_64, powerpc64be, powerpc64le and aarch64.  Does this
look OK for trunk?


The commit message could explain the issue better, so here's v2 with a
more detailed commit message.

-- >8 --

Subject: [PATCH] libstdc++: Robustify long double std::to_chars testcase
[PR98384]

The long double std::to_chars testcase currently verifies the
correctness of its output by comparing it to that of printf, so if
there's a mismatch between to_chars and printf, the test FAILs.  This
works well for the scientific, fixed and general formatting modes,
because the corresponding printf conversion specifiers (%e, %f and %g)
are rigidly specified.

But this doesn't work so well for the hex formatting mode because the
corresponding printf conversion specifier %a is more flexibly specified.
For instance, the hexadecimal forms 0x1p+0, 0x2p-1, 0x4p-2 and 0x8p-3
are all equivalent and valid outputs of the %a specifier for the number
1.  The apparent freedom here is the choice of leading hex digit -- the
standard just requires that the leading hex digit is nonzero for
normalized numbers.

Currently, our hexadecimal formatting implementation uses 0/1/2 as the
leading hex digit for floating point types that have an implicit leading
mantissa bit which in practice means all supported floating point types
except x86 long double.  The latter type has a 64 bit mantissa with an
explicit leading mantissa bit, and for this type our implementation uses
the most significant four bits of the mantissa as leading hex digit.
This seems to be consistent with most printf implementations, but not
all, as PR98384 illustrates.

In order to avoid false-positive FAILs due to arbitrary disagreement
between to_chars and printf about the choice of leading hex digit, this
patch makes the testcase's verification via printf conditional on the
leading hex digits first agreeing.  An additional verification step is
also added: round-tripping the output of to_chars through from_chars
should yield the original value.

Tested on x86, x86_64, powerpc64be, powerpc64le and aarch64.  Does this
look OK for trunk?



@@ -50,6 +51,38 @@ namespace detail
void
test01()
{
+  // Verifies correctness of the hexadecimal form [BEGIN,END) for VALUE by
+  // round-tripping it through from_chars (if available).
+  auto verify_via_from_chars = [] (char *begin, char *end, long double value) {
+#if __cpp_lib_to_chars >= 201611L || _GLIBCXX_HAVE_USELOCALE


This is currently going to fail, because we don't actually define
__cpp_lib_to_chars yet (we should fix that!)

Is checking the feature test macro here useful? We know that
floating-point from_chars was committed before to_chars, so if this
test is running, we should have from_chars (modulo uselocale being
available, so that check is right). Is this to make the test usable
for other C++ std::lib implementations?


+long double roundtrip;
+auto result = from_chars(begin, end, roundtrip, chars_format::hex);
+VERIFY( result.ec == errc{} );
+VERIFY( result.ptr == end );
+VERIFY( roundtrip == value );
+#endif




[committed] libstdc++: Fix failing tests due to 'u' identifier in kernel header

2021-02-24 Thread Jonathan Wakely via Gcc-patches
libstdc++-v3/ChangeLog:

* testsuite/17_intro/names.cc: Undefine 'u' on powerpc*-linux*.

Tested powerpc64le-linux. Committed to trunk.

commit d0453cf5c68b6aa0e8c57a7a99d4285f047387b7
Author: Jonathan Wakely 
Date:   Wed Feb 24 16:24:34 2021

libstdc++: Fix failing tests due to 'u' identifier in kernel header

libstdc++-v3/ChangeLog:

* testsuite/17_intro/names.cc: Undefine 'u' on powerpc*-linux*.

diff --git a/libstdc++-v3/testsuite/17_intro/names.cc 
b/libstdc++-v3/testsuite/17_intro/names.cc
index 77d89203b83..4534d790772 100644
--- a/libstdc++-v3/testsuite/17_intro/names.cc
+++ b/libstdc++-v3/testsuite/17_intro/names.cc
@@ -202,6 +202,11 @@
 #undef r
 #endif
 
+#if defined (__linux__) && defined (__powerpc__)
+//  defines __vector128::u
+#undef u
+#endif
+
 #if defined (__linux__) && defined (__sparc__)
 #undef y
 #endif


Re: [Patch, fortran] PR99125 - [9/10/11 Regression] ICE: gimplification failed (gimplify.c:15068)

2021-02-24 Thread Tobias Burnus

Hi Paul,

On 24.02.21 15:05, Paul Richard Thomas via Fortran wrote:

This problem was caused by the compiler attempting to use 0 as an lvalue
and to assign 0 to it.


I did recall the problem – and indeed: PR95868.

The trans-array.c patch does effectively the same as mine there, some
other use of 'tmp' but otherwise ...

The trans-expr patch, I didn't have; unfortunately, it does not solve
the other issues of my PR, either. (Thus, if you are interested in
continuing the len=: work ...)

[Once that PR is fixed, trans-openmp.c has to updated for it as well.]


Understandably, this upset the gimplifer quite a bit
:-) The fix is to use the ss_info string length for deferred length
character components, where the hidden string length component has been
used. The use of a constant as an lvalue is prevented by checking that the
expression string length is a variable.

Regtests on FC33/x86_64 - OK for all three branches?


LGTM.

Thanks for the patch!

Tobias


Fortran: Fix for class defined operators [PR99125].

2021-02-23  Paul Thomas  

gcc/fortran
PR fortran/99125
* trans-array.c (gfc_conv_expr_descriptor): For deferred length
length components use the ss_info string length instead of
gfc_get_expr_charlen. Make sure that the deferred string length
is a variable before assigning to it. Otherwise use the expr.
* trans-expr.c (gfc_conv_string_length): Make sure that the
deferred string length is a variable before assigning to it.

gcc/testsuite/
PR fortran/99125
* gfortran.dg/alloc_deferred_comp_1.f90: New test.

-
Mentor Graphics (Deutschland) GmbH, Arnulfstrasse 201, 80634 München 
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Frank 
Thürauf


RE: arm: Fix CMSE support detection in libgcc (PR target/99157)

2021-02-24 Thread Kyrylo Tkachov via Gcc-patches


> -Original Message-
> From: Gcc-patches  On Behalf Of
> Christophe Lyon via Gcc-patches
> Sent: 24 February 2021 15:14
> To: gcc Patches 
> Subject: Re: arm: Fix CMSE support detection in libgcc (PR target/99157)
> 
> On Tue, 23 Feb 2021 at 18:29, Christophe Lyon
>  wrote:
> >
> > As discussed in the PR, the Makefile fragment lacks a double '$' to
> > get the return-code from GCC invocation, resulting is CMSE support
> > missing from multilibs.
> >
> > I checked that the simple patch proposed in the PR fixes the problem.
> 
> BTW, if OK for trunk, I will also push it to gcc-10.
> 

Ok.
Thanks,
Kyrill

> Christophe
> 
> >
> > 2021-02-23  Christophe Lyon  
> > Hau Hsu  
> >
> > PR target/99157
> > libgcc/
> > * config/arm/t-arm: Fix cmse support detection.
> >
> > diff --git a/libgcc/config/arm/t-arm b/libgcc/config/arm/t-arm
> > index 364f40e..3625a25 100644
> > --- a/libgcc/config/arm/t-arm
> > +++ b/libgcc/config/arm/t-arm
> > @@ -4,7 +4,7 @@ LIB1ASMFUNCS = _thumb1_case_sqi
> _thumb1_case_uqi
> > _thumb1_case_shi \
> >
> >  HAVE_CMSE:=$(findstring __ARM_FEATURE_CMSE,$(shell
> > $(gcc_compile_bare) -dM -E -  >  HAVE_V81M:=$(findstring armv8.1-m.main,$(gcc_compile_bare))
> > -ifeq ($(shell $(gcc_compile_bare) -E -mcmse - /dev/null
> > 2>/dev/null; echo $?),0)
> > +ifeq ($(shell $(gcc_compile_bare) -E -mcmse - /dev/null
> > 2>/dev/null; echo $$?),0)
> >  CMSE_OPTS:=-mcmse
> >  endif


Re: arm: Fix CMSE support detection in libgcc (PR target/99157)

2021-02-24 Thread Christophe Lyon via Gcc-patches
On Tue, 23 Feb 2021 at 18:29, Christophe Lyon
 wrote:
>
> As discussed in the PR, the Makefile fragment lacks a double '$' to
> get the return-code from GCC invocation, resulting is CMSE support
> missing from multilibs.
>
> I checked that the simple patch proposed in the PR fixes the problem.

BTW, if OK for trunk, I will also push it to gcc-10.

Christophe

>
> 2021-02-23  Christophe Lyon  
> Hau Hsu  
>
> PR target/99157
> libgcc/
> * config/arm/t-arm: Fix cmse support detection.
>
> diff --git a/libgcc/config/arm/t-arm b/libgcc/config/arm/t-arm
> index 364f40e..3625a25 100644
> --- a/libgcc/config/arm/t-arm
> +++ b/libgcc/config/arm/t-arm
> @@ -4,7 +4,7 @@ LIB1ASMFUNCS = _thumb1_case_sqi _thumb1_case_uqi
> _thumb1_case_shi \
>
>  HAVE_CMSE:=$(findstring __ARM_FEATURE_CMSE,$(shell
> $(gcc_compile_bare) -dM -E -   HAVE_V81M:=$(findstring armv8.1-m.main,$(gcc_compile_bare))
> -ifeq ($(shell $(gcc_compile_bare) -E -mcmse - /dev/null
> 2>/dev/null; echo $?),0)
> +ifeq ($(shell $(gcc_compile_bare) -E -mcmse - /dev/null
> 2>/dev/null; echo $$?),0)
>  CMSE_OPTS:=-mcmse
>  endif


Re: [PATCH]middle-end slp: fix accidental resource re-use of slp_tree (PR99220)

2021-02-24 Thread Richard Biener
On Wed, 24 Feb 2021, Tamar Christina wrote:

> Hi Richi,
> 
> This is an updated patch with your suggestion.
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> 
> Ok for master?

OK.

Thanks,
Richard.

> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
>   PR tree-optimization/99220
>   * tree-vect-slp.c (optimize_load_redistribution_1): Remove
>   node from cache when it's about to be deleted.
> 
> gcc/testsuite/ChangeLog:
> 
>   PR tree-optimization/99220
>   * g++.dg/vect/pr99220.cc: New test.
> 
> The 02/24/2021 08:52, Richard Biener wrote:
> > On Tue, 23 Feb 2021, Tamar Christina wrote:
> > 
> > > Hi Richi,
> > > 
> > > The attached testcase shows a bug where two nodes end up with the same 
> > > pointer.
> > > During the loop that analyzes all the instances
> > > in optimize_load_redistribution_1 we do
> > > 
> > >   if (value)
> > > {
> > >   SLP_TREE_REF_COUNT (value)++;
> > >   SLP_TREE_CHILDREN (root)[i] = value;
> > >   vect_free_slp_tree (node);
> > > }
> > > 
> > > when doing a replacement.  When this is done and the refcount for the node
> > > reaches 0, the node is removed, which allows the libc to return the 
> > > pointer
> > > again in the next call to new, which it does..
> > > 
> > > First instance
> > > 
> > > note:   node 0x5325f48 (max_nunits=1, refcnt=2)
> > > note:   op: VEC_PERM_EXPR
> > > note:   { }
> > > note:   lane permutation { 0[0] 1[1] 0[2] 1[3] }
> > > note:   children 0x5325db0 0x5325200
> > > 
> > > Second instance
> > > 
> > > note:   node 0x5325f48 (max_nunits=1, refcnt=1)
> > > note:   op: VEC_PERM_EXPR
> > > note:   { }
> > > note:   lane permutation { 0[0] 1[1] }
> > > note:   children 0x53255b8 0x5325530
> > > 
> > > This will end up with the illegal construction of
> > > 
> > > note:   node 0x53258e8 (max_nunits=2, refcnt=2)
> > > note:   op template: slp_patt_57 = .COMPLEX_MUL (_16, _16);
> > > note:   stmt 0 _16 = _14 - _15;
> > > note:   stmt 1 _23 = _17 + _22;
> > > note:   children 0x53257d8 0x5325d28
> > > note:   node 0x53257d8 (max_nunits=2, refcnt=3)
> > > note:   op template: l$b_4 = MEM[(const struct a &)_3].b;
> > > note:   stmt 0 l$b_4 = MEM[(const struct a &)_3].b;
> > > note:   stmt 1 l$c_5 = MEM[(const struct a &)_3].c;
> > > note:   load permutation { 0 1 }
> > > note:   node 0x5325d28 (max_nunits=2, refcnt=8)
> > > note:   op template: l$b_4 = MEM[(const struct a &)_3].b;
> > > note:   stmt 0 l$b_4 = MEM[(const struct a &)_3].b;
> > > note:   stmt 1 l$c_5 = MEM[(const struct a &)_3].c;
> > > note:   stmt 2 l$b_4 = MEM[(const struct a &)_3].b;
> > > note:   stmt 3 l$c_5 = MEM[(const struct a &)_3].c;
> > > note:   load permutation { 0 1 0 1 }
> > > 
> > > To prevent this my initial thought was to add the temporary VEC_PERM_EXPR 
> > > nodes
> > > to the bst_map cache and increase their refcnt one more.  However since 
> > > bst_map
> > > is gated on scalar statements and these nodes have none we can't do that.
> > > 
> > > Instead I realized that load_map is really only a visited list at the top 
> > > level.
> > > So instead of returning the reference, we should return NULL.
> > > 
> > > What this means is that it will no replacement was found at that level.  
> > > This is
> > > fine since these VEC_PERM_EXPR are single use.  So while the any other 
> > > node is
> > > an indication to use the cache, VEC_PERM_EXPR are an indication to avoid 
> > > it.
> > 
> > I don't understand really.  Waiting for the other patch to be pushed so
> > I can eventually have a look, but see below.
> > 
> > > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> > > 
> > > Ok for master?
> > > 
> > > Thanks,
> > > Tamar
> > > 
> > > gcc/ChangeLog:
> > > 
> > >   PR tree-optimization/99220
> > >   * tree-vect-slp.c (optimize_load_redistribution_1): Don't use
> > >   VEC_PERM_EXPR in cache.
> > > 
> > > gcc/testsuite/ChangeLog:
> > > 
> > >   PR tree-optimization/99220
> > >   * g++.dg/vect/pr99220.cc: New test.
> > > 
> > > --- inline copy of patch -- 
> > > diff --git a/gcc/testsuite/g++.dg/vect/pr99220.cc 
> > > b/gcc/testsuite/g++.dg/vect/pr99220.cc
> > > new file mode 100755
> > > index 
> > > ..ff3058832b742414202a8ada0a9dafc72c9a54aa
> > > --- /dev/null
> > > +++ b/gcc/testsuite/g++.dg/vect/pr99220.cc
> > > @@ -0,0 +1,29 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-additional-options "-w -O3 -march=armv8.3-a" { target { 
> > > aarch64*-*-* } } } */
> > > +
> > > +class a {
> > > +  float b;
> > > +  float c;
> > > +
> > > +public:
> > > +  a(float d, float e) : b(d), c(e) {}
> > > +  a operator+(a d) { return a(b + d.b, c + d.c); }
> > > +  a operator-(a d) { return a(b - d.b, c - d.c); }
> > > +  a operator*(a d) { return a(b * b - c * c, b * c + c * d.b); }
> > > +};
> > > +long f;
> > > +a *g;

[PATCH][comitted] Testsuite: Disable PR99149 test on big-endian

2021-02-24 Thread Tamar Christina via Gcc-patches
Hi All,

This patch disables the test for PR99149 on Big-endian
where for standard AArch64 the patterns are disabled.

Regtested on aarch64-none-linux-gnu and no issues.

Committed under the obvious rule.

Thanks,
Tamar

gcc/testsuite/ChangeLog:

PR tree-optimization/99149
* g++.dg/vect/pr99149.cc: Disabled on BE.

--- inline copy of patch -- 
diff --git a/gcc/testsuite/g++.dg/vect/pr99149.cc 
b/gcc/testsuite/g++.dg/vect/pr99149.cc
index 
9002e3e5268a6c431d0de076d6768c12d79f39f0..00ebe9d9cdf600ada8e66b4b854f0e18ad0b6a7d
 100755
--- a/gcc/testsuite/g++.dg/vect/pr99149.cc
+++ b/gcc/testsuite/g++.dg/vect/pr99149.cc
@@ -1,4 +1,4 @@
-/* { dg-do compile { target { aarch64*-*-* } } } */
+/* { dg-do compile { target { aarch64-*-* } } } */
 /* { dg-additional-options "-w -O3 -march=armv8.3-a -fdump-tree-slp-all" } */
 
 class a {


-- 
diff --git a/gcc/testsuite/g++.dg/vect/pr99149.cc b/gcc/testsuite/g++.dg/vect/pr99149.cc
index 9002e3e5268a6c431d0de076d6768c12d79f39f0..00ebe9d9cdf600ada8e66b4b854f0e18ad0b6a7d 100755
--- a/gcc/testsuite/g++.dg/vect/pr99149.cc
+++ b/gcc/testsuite/g++.dg/vect/pr99149.cc
@@ -1,4 +1,4 @@
-/* { dg-do compile { target { aarch64*-*-* } } } */
+/* { dg-do compile { target { aarch64-*-* } } } */
 /* { dg-additional-options "-w -O3 -march=armv8.3-a -fdump-tree-slp-all" } */
 
 class a {



RE: [PATCH v2] middle-end slp: fix sharing of SLP only patterns.

2021-02-24 Thread Tamar Christina via Gcc-patches
> -Original Message-
> From: Christophe Lyon 
> Sent: Wednesday, February 24, 2021 2:17 PM
> To: Richard Biener 
> Cc: Tamar Christina ; nd ; gcc
> Patches 
> Subject: Re: [PATCH v2] middle-end slp: fix sharing of SLP only patterns.
> 
> On Wed, 24 Feb 2021 at 09:38, Richard Biener  wrote:
> >
> > On Tue, 23 Feb 2021, Tamar Christina wrote:
> >
> > > Hi Richi,
> > >
> > > The attached testcase ICEs due to a couple of issues.
> > > In the testcase you have two SLP instances that share the majority
> > > of their definition with each other.  One tree defines a COMPLEX_MUL
> > > sequence and the other tree a COMPLEX_FMA.
> > >
> > > The ice happens because:
> > >
> > > 1. the refcounts are wrong, in particular the FMA case doesn't
> > > correctly count the references for the COMPLEX_MUL that it consumes.
> > >
> > > 2. when the FMA is created it incorrectly assumes it can just tear
> > > apart the MUL node that it's consuming.  This is wrong and should
> > > only be done when there is no more uses of the node, in which case
> > > the vector only pattern is no longer relevant.
> > >
> > > To fix the last part the SLP only pattern reset code was moved into
> > > vect_free_slp_tree which results in cleaner code.  I also think it
> > > does belong there since that function knows when there are no more
> > > uses of the node and so the pattern should be unmarked, so when the
> > > the vectorizer is inspecting the BB it doesn't find the now invalid vector
> only patterns.
> > >
> > > The patch also clears the SLP_TREE_REPRESENTATIVE when stores are
> > > removed such that we don't hit an error later trying to free the
> stmt_vec_info again.
> > >
> > > Lastly it also tweaks the results of whether a pattern has been
> > > detected or not to return true when another SLP instance has created
> > > a pattern that is only used by a different instance (due to the trees 
> > > being
> unshared).
> > >
> > > Instead of ICEing this code now produces
> > >
> > > adrpx1, .LANCHOR0
> > > add x2, x1, :lo12:.LANCHOR0
> > > moviv1.2s, 0
> > > mov w0, 0
> > > ldr x4, [x1, #:lo12:.LANCHOR0]
> > > ldrsw   x3, [x2, 16]
> > > ldr x1, [x2, 8]
> > > ldrsw   x2, [x2, 20]
> > > ldr d0, [x4]
> > > ldr d2, [x1, x3, lsl 3]
> > > fcmla   v2.2s, v0.2s, v0.2s, #0
> > > fcmla   v2.2s, v0.2s, v0.2s, #90
> > > str d2, [x1, x3, lsl 3]
> > > fcmla   v1.2s, v0.2s, v0.2s, #0
> > > fcmla   v1.2s, v0.2s, v0.2s, #90
> > > str d1, [x1, x2, lsl 3]
> > > ret
> > >
> > > PS. This testcase actually shows that the codegen we get in these
> > > cases is not optimal. It should generate a MUL + ADD instead MUL + FMA.
> > >
> > > But that's for GCC 12.
> > >
> > > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> 
> Hi Tamar,
> 
> The new test fails on aarch64_be:
> FAIL: g++.dg/vect/pr99149.cc  -std=c++14  scan-tree-dump-times slp2
> "stmt.*COMPLEX_FMA" 1
> FAIL: g++.dg/vect/pr99149.cc  -std=c++17  scan-tree-dump-times slp2
> "stmt.*COMPLEX_FMA" 1
> FAIL: g++.dg/vect/pr99149.cc  -std=c++2a  scan-tree-dump-times slp2
> "stmt.*COMPLEX_FMA" 1
> FAIL: g++.dg/vect/pr99149.cc  -std=c++98  scan-tree-dump-times slp2
> "stmt.*COMPLEX_FMA" 1
> 
> Is that supposed to work, or should you disable the test on aarch64_be?

Args, it's a bit complicated, it's blocked on AArch64 big-endian but not SVE 
big-endian.

I'll disable all be for now.

Thanks,
Tamar
> 
> Christophe
> 
> >
> > OK.
> >
> > Thanks,
> > Richard.
> >
> > > Thanks,
> > > Tamar
> > >
> > > gcc/ChangeLog:
> > >
> > >   PR tree-optimization/99149
> > >   * tree-vect-slp-patterns.c (vect_detect_pair_op): Don't recreate the
> > >   buffer.
> > >   (vect_slp_reset_pattern): Remove.
> > >   (complex_fma_pattern::matches): Remove call to
> vect_slp_reset_pattern.
> > >   (complex_mul_pattern::build, complex_fma_pattern::build,
> > >   complex_fms_pattern::build): Fix ref counts.
> > >   * tree-vect-slp.c (vect_free_slp_tree): Undo SLP only pattern
> relevancy
> > >   when node is being deleted.
> > >   (vect_match_slp_patterns_2): Correct result of cache hit on 
> > > patterns.
> > >   (vect_schedule_slp): Invalidate SLP_TREE_REPRESENTATIVE of
> removed
> > >   stores.
> > >   * tree-vectorizer.c (vec_info::new_stmt_vec_info): Initialize value.
> > >
> > > gcc/testsuite/ChangeLog:
> > >
> > >   PR tree-optimization/99149
> > >   * g++.dg/vect/pr99149.cc: New test.
> > >
> > > --- inline copy of patch --
> > > diff --git a/gcc/testsuite/g++.dg/vect/pr99149.cc
> > > b/gcc/testsuite/g++.dg/vect/pr99149.cc
> > > new file mode 100755
> > > index
> > >
> ..902a26f576fcc79d2802bec093
> > > 668674cca1c84f
> > > --- /dev/null
> > > +++ b/gcc/testsuite/g++.dg/vect/pr99149.cc
> > > @@ -0,0 +1,28 @@
> > > +/* { dg-do compile } */
> > > 

Re: [PATCH v2] middle-end slp: fix sharing of SLP only patterns.

2021-02-24 Thread Christophe Lyon via Gcc-patches
On Wed, 24 Feb 2021 at 09:38, Richard Biener  wrote:
>
> On Tue, 23 Feb 2021, Tamar Christina wrote:
>
> > Hi Richi,
> >
> > The attached testcase ICEs due to a couple of issues.
> > In the testcase you have two SLP instances that share the majority of their
> > definition with each other.  One tree defines a COMPLEX_MUL sequence and the
> > other tree a COMPLEX_FMA.
> >
> > The ice happens because:
> >
> > 1. the refcounts are wrong, in particular the FMA case doesn't correctly 
> > count
> > the references for the COMPLEX_MUL that it consumes.
> >
> > 2. when the FMA is created it incorrectly assumes it can just tear apart 
> > the MUL
> > node that it's consuming.  This is wrong and should only be done when there 
> > is
> > no more uses of the node, in which case the vector only pattern is no longer
> > relevant.
> >
> > To fix the last part the SLP only pattern reset code was moved into
> > vect_free_slp_tree which results in cleaner code.  I also think it does 
> > belong
> > there since that function knows when there are no more uses of the node and 
> > so
> > the pattern should be unmarked, so when the the vectorizer is inspecting 
> > the BB
> > it doesn't find the now invalid vector only patterns.
> >
> > The patch also clears the SLP_TREE_REPRESENTATIVE when stores are removed 
> > such
> > that we don't hit an error later trying to free the stmt_vec_info again.
> >
> > Lastly it also tweaks the results of whether a pattern has been detected or 
> > not
> > to return true when another SLP instance has created a pattern that is only 
> > used
> > by a different instance (due to the trees being unshared).
> >
> > Instead of ICEing this code now produces
> >
> > adrpx1, .LANCHOR0
> > add x2, x1, :lo12:.LANCHOR0
> > moviv1.2s, 0
> > mov w0, 0
> > ldr x4, [x1, #:lo12:.LANCHOR0]
> > ldrsw   x3, [x2, 16]
> > ldr x1, [x2, 8]
> > ldrsw   x2, [x2, 20]
> > ldr d0, [x4]
> > ldr d2, [x1, x3, lsl 3]
> > fcmla   v2.2s, v0.2s, v0.2s, #0
> > fcmla   v2.2s, v0.2s, v0.2s, #90
> > str d2, [x1, x3, lsl 3]
> > fcmla   v1.2s, v0.2s, v0.2s, #0
> > fcmla   v1.2s, v0.2s, v0.2s, #90
> > str d1, [x1, x2, lsl 3]
> > ret
> >
> > PS. This testcase actually shows that the codegen we get in these cases is 
> > not
> > optimal. It should generate a MUL + ADD instead MUL + FMA.
> >
> > But that's for GCC 12.
> >
> > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Hi Tamar,

The new test fails on aarch64_be:
FAIL: g++.dg/vect/pr99149.cc  -std=c++14  scan-tree-dump-times slp2
"stmt.*COMPLEX_FMA" 1
FAIL: g++.dg/vect/pr99149.cc  -std=c++17  scan-tree-dump-times slp2
"stmt.*COMPLEX_FMA" 1
FAIL: g++.dg/vect/pr99149.cc  -std=c++2a  scan-tree-dump-times slp2
"stmt.*COMPLEX_FMA" 1
FAIL: g++.dg/vect/pr99149.cc  -std=c++98  scan-tree-dump-times slp2
"stmt.*COMPLEX_FMA" 1

Is that supposed to work, or should you disable the test on aarch64_be?

Christophe

>
> OK.
>
> Thanks,
> Richard.
>
> > Thanks,
> > Tamar
> >
> > gcc/ChangeLog:
> >
> >   PR tree-optimization/99149
> >   * tree-vect-slp-patterns.c (vect_detect_pair_op): Don't recreate the
> >   buffer.
> >   (vect_slp_reset_pattern): Remove.
> >   (complex_fma_pattern::matches): Remove call to vect_slp_reset_pattern.
> >   (complex_mul_pattern::build, complex_fma_pattern::build,
> >   complex_fms_pattern::build): Fix ref counts.
> >   * tree-vect-slp.c (vect_free_slp_tree): Undo SLP only pattern 
> > relevancy
> >   when node is being deleted.
> >   (vect_match_slp_patterns_2): Correct result of cache hit on patterns.
> >   (vect_schedule_slp): Invalidate SLP_TREE_REPRESENTATIVE of removed
> >   stores.
> >   * tree-vectorizer.c (vec_info::new_stmt_vec_info): Initialize value.
> >
> > gcc/testsuite/ChangeLog:
> >
> >   PR tree-optimization/99149
> >   * g++.dg/vect/pr99149.cc: New test.
> >
> > --- inline copy of patch --
> > diff --git a/gcc/testsuite/g++.dg/vect/pr99149.cc 
> > b/gcc/testsuite/g++.dg/vect/pr99149.cc
> > new file mode 100755
> > index 
> > ..902a26f576fcc79d2802bec093668674cca1c84f
> > --- /dev/null
> > +++ b/gcc/testsuite/g++.dg/vect/pr99149.cc
> > @@ -0,0 +1,28 @@
> > +/* { dg-do compile } */
> > +/* { dg-additional-options "-w -O3 -march=armv8.3-a -fdump-tree-slp-all" { 
> > target { aarch64*-*-* } } } */
> > +
> > +class a {
> > +  float b;
> > +  float c;
> > +
> > +public:
> > +  a(float d, float e) : b(d), c(e) {}
> > +  a operator+(a d) { return a(b + d.b, c + d.c); }
> > +  a operator*(a d) { return a(b * b - c * c, b * c + c * d.b); }
> > +};
> > +int f, g;
> > +class {
> > +  a *h;
> > +  a *i;
> > +
> > +public:
> > +  void j() {
> > +a k = h[0], l = i[g], m = k * i[f];
> > +i[g] = l + m;
> > +i[f] = m;
> > +  }
> > +} n;
> > +main() { n.j(); }
> > +
> > 

Re: [PATCH]middle-end slp: fix accidental resource re-use of slp_tree (PR99220)

2021-02-24 Thread Tamar Christina via Gcc-patches
Hi Richi,

This is an updated patch with your suggestion.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

PR tree-optimization/99220
* tree-vect-slp.c (optimize_load_redistribution_1): Remove
node from cache when it's about to be deleted.

gcc/testsuite/ChangeLog:

PR tree-optimization/99220
* g++.dg/vect/pr99220.cc: New test.

The 02/24/2021 08:52, Richard Biener wrote:
> On Tue, 23 Feb 2021, Tamar Christina wrote:
> 
> > Hi Richi,
> > 
> > The attached testcase shows a bug where two nodes end up with the same 
> > pointer.
> > During the loop that analyzes all the instances
> > in optimize_load_redistribution_1 we do
> > 
> >   if (value)
> > {
> >   SLP_TREE_REF_COUNT (value)++;
> >   SLP_TREE_CHILDREN (root)[i] = value;
> >   vect_free_slp_tree (node);
> > }
> > 
> > when doing a replacement.  When this is done and the refcount for the node
> > reaches 0, the node is removed, which allows the libc to return the pointer
> > again in the next call to new, which it does..
> > 
> > First instance
> > 
> > note:   node 0x5325f48 (max_nunits=1, refcnt=2)
> > note:   op: VEC_PERM_EXPR
> > note:   { }
> > note:   lane permutation { 0[0] 1[1] 0[2] 1[3] }
> > note:   children 0x5325db0 0x5325200
> > 
> > Second instance
> > 
> > note:   node 0x5325f48 (max_nunits=1, refcnt=1)
> > note:   op: VEC_PERM_EXPR
> > note:   { }
> > note:   lane permutation { 0[0] 1[1] }
> > note:   children 0x53255b8 0x5325530
> > 
> > This will end up with the illegal construction of
> > 
> > note:   node 0x53258e8 (max_nunits=2, refcnt=2)
> > note:   op template: slp_patt_57 = .COMPLEX_MUL (_16, _16);
> > note:   stmt 0 _16 = _14 - _15;
> > note:   stmt 1 _23 = _17 + _22;
> > note:   children 0x53257d8 0x5325d28
> > note:   node 0x53257d8 (max_nunits=2, refcnt=3)
> > note:   op template: l$b_4 = MEM[(const struct a &)_3].b;
> > note:   stmt 0 l$b_4 = MEM[(const struct a &)_3].b;
> > note:   stmt 1 l$c_5 = MEM[(const struct a &)_3].c;
> > note:   load permutation { 0 1 }
> > note:   node 0x5325d28 (max_nunits=2, refcnt=8)
> > note:   op template: l$b_4 = MEM[(const struct a &)_3].b;
> > note:   stmt 0 l$b_4 = MEM[(const struct a &)_3].b;
> > note:   stmt 1 l$c_5 = MEM[(const struct a &)_3].c;
> > note:   stmt 2 l$b_4 = MEM[(const struct a &)_3].b;
> > note:   stmt 3 l$c_5 = MEM[(const struct a &)_3].c;
> > note:   load permutation { 0 1 0 1 }
> > 
> > To prevent this my initial thought was to add the temporary VEC_PERM_EXPR 
> > nodes
> > to the bst_map cache and increase their refcnt one more.  However since 
> > bst_map
> > is gated on scalar statements and these nodes have none we can't do that.
> > 
> > Instead I realized that load_map is really only a visited list at the top 
> > level.
> > So instead of returning the reference, we should return NULL.
> > 
> > What this means is that it will no replacement was found at that level.  
> > This is
> > fine since these VEC_PERM_EXPR are single use.  So while the any other node 
> > is
> > an indication to use the cache, VEC_PERM_EXPR are an indication to avoid it.
> 
> I don't understand really.  Waiting for the other patch to be pushed so
> I can eventually have a look, but see below.
> 
> > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> > 
> > Ok for master?
> > 
> > Thanks,
> > Tamar
> > 
> > gcc/ChangeLog:
> > 
> > PR tree-optimization/99220
> > * tree-vect-slp.c (optimize_load_redistribution_1): Don't use
> > VEC_PERM_EXPR in cache.
> > 
> > gcc/testsuite/ChangeLog:
> > 
> > PR tree-optimization/99220
> > * g++.dg/vect/pr99220.cc: New test.
> > 
> > --- inline copy of patch -- 
> > diff --git a/gcc/testsuite/g++.dg/vect/pr99220.cc 
> > b/gcc/testsuite/g++.dg/vect/pr99220.cc
> > new file mode 100755
> > index 
> > ..ff3058832b742414202a8ada0a9dafc72c9a54aa
> > --- /dev/null
> > +++ b/gcc/testsuite/g++.dg/vect/pr99220.cc
> > @@ -0,0 +1,29 @@
> > +/* { dg-do compile } */
> > +/* { dg-additional-options "-w -O3 -march=armv8.3-a" { target { 
> > aarch64*-*-* } } } */
> > +
> > +class a {
> > +  float b;
> > +  float c;
> > +
> > +public:
> > +  a(float d, float e) : b(d), c(e) {}
> > +  a operator+(a d) { return a(b + d.b, c + d.c); }
> > +  a operator-(a d) { return a(b - d.b, c - d.c); }
> > +  a operator*(a d) { return a(b * b - c * c, b * c + c * d.b); }
> > +};
> > +long f;
> > +a *g;
> > +class {
> > +  a *h;
> > +  long i;
> > +  a *j;
> > +
> > +public:
> > +  void k() {
> > +a l = h[0], m = g[i], n = l * g[1], o = l * j[8];
> > +g[i] = m + n;
> > +g[i + 1] = m - n;
> > +j[f] = o;
> > +  }
> > +} p;
> > +main() { p.k(); }
> > diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
> > index 
> > 

[Patch, fortran] PR99125 - [9/10/11 Regression] ICE: gimplification failed (gimplify.c:15068)

2021-02-24 Thread Paul Richard Thomas via Gcc-patches
This problem was caused by the compiler attempting to use 0 as an lvalue
and to assign 0 to it. Understandably, this upset the gimplifer quite a bit
:-) The fix is to use the ss_info string length for deferred length
character components, where the hidden string length component has been
used. The use of a constant as an lvalue is prevented by checking that the
expression string length is a variable.

Regtests on FC33/x86_64 - OK for all three branches?

Paul

Fortran: Fix for class defined operators [PR99125].

2021-02-23  Paul Thomas  

gcc/fortran
PR fortran/99125
* trans-array.c (gfc_conv_expr_descriptor): For deferred length
length components use the ss_info string length instead of
gfc_get_expr_charlen. Make sure that the deferred string length
is a variable before assigning to it. Otherwise use the expr.
* trans-expr.c (gfc_conv_string_length): Make sure that the
deferred string length is a variable before assigning to it.

gcc/testsuite/
PR fortran/99125
* gfortran.dg/alloc_deferred_comp_1.f90: New test.
diff --git a/gcc/fortran/trans-array.c b/gcc/fortran/trans-array.c
index c6725659093..e85d63c3539 100644
--- a/gcc/fortran/trans-array.c
+++ b/gcc/fortran/trans-array.c
@@ -7670,15 +7670,21 @@ gfc_conv_expr_descriptor (gfc_se *se, gfc_expr *expr)
   /* Set the string_length for a character array.  */
   if (expr->ts.type == BT_CHARACTER)
 	{
-	  se->string_length =  gfc_get_expr_charlen (expr);
+	  if (deferred_array_component)
+	se->string_length = ss_info->string_length;
+	  else
+	se->string_length =  gfc_get_expr_charlen (expr);
+
 	  if (VAR_P (se->string_length)
 	  && expr->ts.u.cl->backend_decl == se->string_length)
 	tmp = ss_info->string_length;
 	  else
 	tmp = se->string_length;
 
-	  if (expr->ts.deferred)
+	  if (expr->ts.deferred && VAR_P (expr->ts.u.cl->backend_decl))
 	gfc_add_modify (>pre, expr->ts.u.cl->backend_decl, tmp);
+	  else
+	expr->ts.u.cl->backend_decl = tmp;
 	}
 
   /* If we have an array section, are assigning  or passing an array
diff --git a/gcc/fortran/trans-expr.c b/gcc/fortran/trans-expr.c
index e61492485b8..9d178360fc3 100644
--- a/gcc/fortran/trans-expr.c
+++ b/gcc/fortran/trans-expr.c
@@ -2471,7 +2471,7 @@ gfc_conv_string_length (gfc_charlen * cl, gfc_expr * expr, stmtblock_t * pblock)
 			 se.expr, build_zero_cst (TREE_TYPE (se.expr)));
   gfc_add_block_to_block (pblock, );
 
-  if (cl->backend_decl)
+  if (cl->backend_decl && VAR_P (cl->backend_decl))
 gfc_add_modify (pblock, cl->backend_decl, se.expr);
   else
 cl->backend_decl = gfc_evaluate_now (se.expr, pblock);
! { dg-do run }
!
! Test the fix for PR99125, where the array reference in the print
! statement caused an ICE because the gimplifier complained about '0'
! being used as an lvalue.
!
! Contributed by Gerhard Steinmetz  
!
program p
   type t
  character(:), allocatable :: a(:)
   end type
   type(t) :: x
   character(8) :: c(3) = ['12 45 78','23 56 89','34 67 90']
   x%a = c
   if (any (x%a(2:3) .ne. ['23 56 89','34 67 90'])) stop 1
   if (any (x%a(2:3)(4:5) .ne. ['56','67'])) stop 2 ! Bizarrely this worked.
end


Re: [PATCH] match.pd, expand: Fold VCE from integer with [0, 1] range to bool into NOP_EXPR [PR80635]

2021-02-24 Thread Richard Biener
On Wed, 24 Feb 2021, Jakub Jelinek wrote:

> On Wed, Feb 24, 2021 at 11:50:10AM +0100, Richard Biener wrote:
> > > In the PR using NOP_EXPR has been discussed as one possibility and has 
> > > been
> > > rejected because at expansion it will emit a superfluous & 1 operation.
> > > I still think it is a good idea to use NOP_EXPR and so have changed
> > > expansion to not emit that & 1 operation in that case.  Both spots are
> > > done with tight conditions (bool only etc.), as I'd like to fix just
> > > that case and not introduce a wider general optimization, but perhaps
> > > later we could lift it and do a general range of arbitrary
> > > type_has_mode_precision_p to non-type_has_mode_precision_p with same
> > > TYPE_MODE case.
> > 
> > But it still is a pessimization.  VCE says there's no code to be
> > generated but NOP_EXPR says there is a conversion involved, even
> > if you later elide it via ssa_name_has_boolean_range.
> 
> I'm not convinced it is a pessimization.
> Because, a NOP_EXPR is something the compiler can optimize orders of
> magnitude more than VCE.
> To back that up by some random numbers,
> grep CASE_CONVERT: gimple-match.c | wc -l; grep VIEW_CONVERT_EXPR: 
> gimple-match.c | wc -l
> 417
> 18
> 
> > So I wonder what other optimizations are prevented here?
> 
> > Why does uninit warn with VCE but not with NOP_EXPR?  Or does the
> > warning disappear because of those other optimizations you mention?
> 
> The optimization that it prevents is in this particular case in tree-vrp.c
> (vrp_simplify_cond_using_ranges):
> 
>   if (!is_gimple_assign (def_stmt)
>   || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
> return;
> so it punts on VIEW_CONVERT_EXPR, with NOP_EXPR it optimizes that:
>   _9 = (bool) maybe_a$4_7;
>   if (_9 != 0)
> into:
>   _9 = (bool) maybe_a$4_7;
>   if (maybe_a$4_7 != 0)
> 
> Now, if I apply my patch but manually disable this
> vrp_simplify_cond_using_ranges optimization, then the uninit warning is
> back, so on the uninit side it is not about VIEW_CONVERT_EXPR vs. NOP_EXPR,
> both are bad there, uninit wants the guarding condition to be
> that SSA_NAME and not some demotion cast thereof.
> We have:
>   # maybe_a$m_6 = PHI <_5(4), maybe_a$m_4(D)(6)>
>   # maybe_a$4_7 = PHI <1(4), 0(6)>
> ...
> One of:
>   _9 = VIEW_CONVERT_EXPR(maybe_a$4_7);
>   if (_9 != 0)
> or:
>   _9 = (bool) maybe_a$4_7;
>   if (_9 != 0)
> or:
>   if (maybe_a$4_7 != 0)
> followed by:
> goto ; [0.00%]
>   else
> goto ; [0.00%]
> ...
>[count: 0]:
>   set (maybe_a$m_6);
> and uninit wants to see that maybe_a$m_4(D) is not used if
> bb 11 is encountered.
> 
> So, if you are strongly opposed to the posted patch, I guess the fix can be
> (at least fixes the testcase; completely untested except for
> make check-c++-all RUNTESTFLAGS='--target_board=unix\{-m32,-m64\} 
> dg.exp=pr80635*.C'
> ) following.
> But, I fear there will be dozens of other spots where we'll punt on
> optimizing when it is a VCE rather than NOP_EXPR.

Yes, I don't think the folding is desired.  Since it can't be applied
in all cases anyway (ssa_name_has_boolean_range), so it's better if
passes learn to handle GIMPLEs V_C_E type-punning (we're still missing
something along (subreg:.. ) on RTL, but V_C_E with relaxed requirements
would do).

Note I'd rather do the reverse transformation to elide
bit-precision arithmetic to full mode precision one and then
communicate the not required truncations by using V_C_E
instead of NOPS from the mode precision arithmetic to the
bit-precision result.

Small comment about the patch below, which otherwise is OK:

> 2021-02-24  Jakub Jelinek  
> 
>   PR tree-optimization/80635
>   * tree-vrp.c (vrp_simplify_cond_using_ranges): Also handle
>   VIEW_CONVERT_EXPR if modes are the same, innerop is integral and
>   has mode precision.
> 
>   * g++.dg/warn/pr80635-1.C: New test.
>   * g++.dg/warn/pr80635-2.C: New test.
> 
> --- gcc/tree-vrp.c.jj 2021-02-24 12:56:58.573939572 +0100
> +++ gcc/tree-vrp.c2021-02-24 13:05:22.675326780 +0100
> @@ -4390,11 +4390,24 @@ vrp_simplify_cond_using_ranges (vr_value
>gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
>tree innerop;
>  
> -  if (!is_gimple_assign (def_stmt)
> -   || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
> +  if (!is_gimple_assign (def_stmt))
>   return;
>  
> -  innerop = gimple_assign_rhs1 (def_stmt);
> +  switch (gimple_assign_rhs_code (def_stmt))
> + {
> + CASE_CONVERT:
> +   innerop = gimple_assign_rhs1 (def_stmt);
> +   break;
> + case VIEW_CONVERT_EXPR:
> +   innerop = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
> +   if (TYPE_MODE (TREE_TYPE (op0)) != TYPE_MODE (TREE_TYPE (innerop))
> +   || !INTEGRAL_TYPE_P (TREE_TYPE (innerop))
> +   || !type_has_mode_precision_p (TREE_TYPE (innerop)))

I think that !INTEGRAL_TYPE_P (TREE_TYPE (innerop)) is a sufficient
condition here.


Re: [PATCH] match.pd, expand: Fold VCE from integer with [0, 1] range to bool into NOP_EXPR [PR80635]

2021-02-24 Thread Jakub Jelinek via Gcc-patches
On Wed, Feb 24, 2021 at 11:50:10AM +0100, Richard Biener wrote:
> > In the PR using NOP_EXPR has been discussed as one possibility and has been
> > rejected because at expansion it will emit a superfluous & 1 operation.
> > I still think it is a good idea to use NOP_EXPR and so have changed
> > expansion to not emit that & 1 operation in that case.  Both spots are
> > done with tight conditions (bool only etc.), as I'd like to fix just
> > that case and not introduce a wider general optimization, but perhaps
> > later we could lift it and do a general range of arbitrary
> > type_has_mode_precision_p to non-type_has_mode_precision_p with same
> > TYPE_MODE case.
> 
> But it still is a pessimization.  VCE says there's no code to be
> generated but NOP_EXPR says there is a conversion involved, even
> if you later elide it via ssa_name_has_boolean_range.

I'm not convinced it is a pessimization.
Because, a NOP_EXPR is something the compiler can optimize orders of
magnitude more than VCE.
To back that up by some random numbers,
grep CASE_CONVERT: gimple-match.c | wc -l; grep VIEW_CONVERT_EXPR: 
gimple-match.c | wc -l
417
18

> So I wonder what other optimizations are prevented here?

> Why does uninit warn with VCE but not with NOP_EXPR?  Or does the
> warning disappear because of those other optimizations you mention?

The optimization that it prevents is in this particular case in tree-vrp.c
(vrp_simplify_cond_using_ranges):

  if (!is_gimple_assign (def_stmt)
  || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
return;
so it punts on VIEW_CONVERT_EXPR, with NOP_EXPR it optimizes that:
  _9 = (bool) maybe_a$4_7;
  if (_9 != 0)
into:
  _9 = (bool) maybe_a$4_7;
  if (maybe_a$4_7 != 0)

Now, if I apply my patch but manually disable this
vrp_simplify_cond_using_ranges optimization, then the uninit warning is
back, so on the uninit side it is not about VIEW_CONVERT_EXPR vs. NOP_EXPR,
both are bad there, uninit wants the guarding condition to be
that SSA_NAME and not some demotion cast thereof.
We have:
  # maybe_a$m_6 = PHI <_5(4), maybe_a$m_4(D)(6)>
  # maybe_a$4_7 = PHI <1(4), 0(6)>
...
One of:
  _9 = VIEW_CONVERT_EXPR(maybe_a$4_7);
  if (_9 != 0)
or:
  _9 = (bool) maybe_a$4_7;
  if (_9 != 0)
or:
  if (maybe_a$4_7 != 0)
followed by:
goto ; [0.00%]
  else
goto ; [0.00%]
...
   [count: 0]:
  set (maybe_a$m_6);
and uninit wants to see that maybe_a$m_4(D) is not used if
bb 11 is encountered.

So, if you are strongly opposed to the posted patch, I guess the fix can be
(at least fixes the testcase; completely untested except for
make check-c++-all RUNTESTFLAGS='--target_board=unix\{-m32,-m64\} 
dg.exp=pr80635*.C'
) following.
But, I fear there will be dozens of other spots where we'll punt on
optimizing when it is a VCE rather than NOP_EXPR.

2021-02-24  Jakub Jelinek  

PR tree-optimization/80635
* tree-vrp.c (vrp_simplify_cond_using_ranges): Also handle
VIEW_CONVERT_EXPR if modes are the same, innerop is integral and
has mode precision.

* g++.dg/warn/pr80635-1.C: New test.
* g++.dg/warn/pr80635-2.C: New test.

--- gcc/tree-vrp.c.jj   2021-02-24 12:56:58.573939572 +0100
+++ gcc/tree-vrp.c  2021-02-24 13:05:22.675326780 +0100
@@ -4390,11 +4390,24 @@ vrp_simplify_cond_using_ranges (vr_value
   gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
   tree innerop;
 
-  if (!is_gimple_assign (def_stmt)
- || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
+  if (!is_gimple_assign (def_stmt))
return;
 
-  innerop = gimple_assign_rhs1 (def_stmt);
+  switch (gimple_assign_rhs_code (def_stmt))
+   {
+   CASE_CONVERT:
+ innerop = gimple_assign_rhs1 (def_stmt);
+ break;
+   case VIEW_CONVERT_EXPR:
+ innerop = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
+ if (TYPE_MODE (TREE_TYPE (op0)) != TYPE_MODE (TREE_TYPE (innerop))
+ || !INTEGRAL_TYPE_P (TREE_TYPE (innerop))
+ || !type_has_mode_precision_p (TREE_TYPE (innerop)))
+   return;
+ break;
+   default:
+ break;
+   }
 
   if (TREE_CODE (innerop) == SSA_NAME
  && !POINTER_TYPE_P (TREE_TYPE (innerop))
--- gcc/testsuite/g++.dg/warn/pr80635-1.C.jj2021-02-24 12:24:15.176834532 
+0100
+++ gcc/testsuite/g++.dg/warn/pr80635-1.C   2021-02-24 12:24:15.176834532 
+0100
@@ -0,0 +1,46 @@
+// PR tree-optimization/80635
+// { dg-do compile { target c++11 } }
+// { dg-options "-O2 -Wmaybe-uninitialized" }
+
+using size_t = decltype (sizeof (1));
+inline void *operator new (size_t, void *p) { return p; }
+template
+struct optional
+{
+  optional () : m_dummy (), live (false) {}
+  void emplace () { new (_item) T (); live = true; }
+  ~optional () { if (live) m_item.~T (); }
+
+  union
+  {
+struct {} m_dummy;
+T m_item;
+  };
+  bool live;
+};
+
+extern int get ();
+extern void set (int);
+
+struct A
+{
+  A () : m (get ()) {}
+  ~A () 

Re: [PATCH] fold-const: Fix up ((1 << x) & y) != 0 folding for vectors [PR99225]

2021-02-24 Thread Richard Biener
On Wed, 24 Feb 2021, Jakub Jelinek wrote:

> Hi!
> 
> This optimization was written purely with scalar integers in mind,
> can work fine even with vectors, but we can't use build_int_cst but
> need to use build_one_cst instead.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

Richard.
 
> 2021-02-24  Jakub Jelinek  
> 
>   PR tree-optimization/99225
>   * fold-const.c (fold_binary_loc) : In (x & (1 << y)) != 0
>   to ((x >> y) & 1) != 0 simplifications use build_one_cst instead of
>   build_int_cst (..., 1).  Formatting fixes.
> 
>   * gcc.c-torture/compile/pr99225.c: New test.
> 
> --- gcc/fold-const.c.jj   2021-02-23 09:49:40.0 +0100
> +++ gcc/fold-const.c  2021-02-23 19:53:33.143763292 +0100
> @@ -12044,23 +12044,23 @@ fold_binary_loc (location_t loc, enum tr
> && integer_onep (TREE_OPERAND (arg00, 0)))
>   {
> tree tem = fold_build2_loc (loc, RSHIFT_EXPR, TREE_TYPE (arg00),
> -   arg01, TREE_OPERAND (arg00, 1));
> +   arg01, TREE_OPERAND (arg00, 1));
> tem = fold_build2_loc (loc, BIT_AND_EXPR, TREE_TYPE (arg0), tem,
> -  build_int_cst (TREE_TYPE (arg0), 1));
> +  build_one_cst (TREE_TYPE (arg0)));
> return fold_build2_loc (loc, code, type,
> -   fold_convert_loc (loc, TREE_TYPE (arg1), tem),
> -   arg1);
> +   fold_convert_loc (loc, TREE_TYPE (arg1),
> + tem), arg1);
>   }
> else if (TREE_CODE (arg01) == LSHIFT_EXPR
>  && integer_onep (TREE_OPERAND (arg01, 0)))
>   {
> tree tem = fold_build2_loc (loc, RSHIFT_EXPR, TREE_TYPE (arg01),
> -   arg00, TREE_OPERAND (arg01, 1));
> +   arg00, TREE_OPERAND (arg01, 1));
> tem = fold_build2_loc (loc, BIT_AND_EXPR, TREE_TYPE (arg0), tem,
> -  build_int_cst (TREE_TYPE (arg0), 1));
> +  build_one_cst (TREE_TYPE (arg0)));
> return fold_build2_loc (loc, code, type,
> -   fold_convert_loc (loc, TREE_TYPE (arg1), tem),
> -   arg1);
> +   fold_convert_loc (loc, TREE_TYPE (arg1),
> + tem), arg1);
>   }
>   }
>  
> --- gcc/testsuite/gcc.c-torture/compile/pr99225.c.jj  2021-02-23 
> 20:12:01.825464969 +0100
> +++ gcc/testsuite/gcc.c-torture/compile/pr99225.c 2021-02-23 
> 20:11:45.962640464 +0100
> @@ -0,0 +1,31 @@
> +/* PR tree-optimization/99225 */
> +
> +typedef int V __attribute__((vector_size (4 * sizeof (int;
> +
> +void
> +foo (V *x)
> +{
> +  x[2] = (x[0] & (1 << x[1])) != 0;
> +}
> +
> +void
> +bar (V *x)
> +{
> +  x[2] = ((1 << x[1]) & x[0]) != 0;
> +}
> +
> +void
> +baz (V *x)
> +{
> +  V a = 1 << x[1];
> +  V b = a & x[0];
> +  x[2] = b != 0;
> +}
> +
> +void
> +qux (V *x)
> +{
> +  V a = 1 << x[1];
> +  V b = x[0] & a;
> +  x[2] = b != 0;
> +}
> 
>   Jakub
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)


Re: [PATCH] match.pd, expand: Fold VCE from integer with [0, 1] range to bool into NOP_EXPR [PR80635]

2021-02-24 Thread Richard Biener
On Wed, 24 Feb 2021, Jakub Jelinek wrote:

> Hi!
> 
> SRA creates a VCE from integer to bool and that VCE then prevents other
> optimizations or e.g. prevents the uninit pass from avoiding a false
> positive warning.
> 
> In the PR using NOP_EXPR has been discussed as one possibility and has been
> rejected because at expansion it will emit a superfluous & 1 operation.
> I still think it is a good idea to use NOP_EXPR and so have changed
> expansion to not emit that & 1 operation in that case.  Both spots are
> done with tight conditions (bool only etc.), as I'd like to fix just
> that case and not introduce a wider general optimization, but perhaps
> later we could lift it and do a general range of arbitrary
> type_has_mode_precision_p to non-type_has_mode_precision_p with same
> TYPE_MODE case.

But it still is a pessimization.  VCE says there's no code to be
generated but NOP_EXPR says there is a conversion involved, even
if you later elide it via ssa_name_has_boolean_range.

So I wonder what other optimizations are prevented here?

Why does uninit warn with VCE but not with NOP_EXPR?  Or does the
warning disappear because of those other optimizations you mention?


> On this particular case, the expr.c part isn't really needed, because we
> have an unsigned char SSA_NAME with values 0 or 1, VCE to bool and
> comparison of that bool against false in a condition, and with the
> match.pd change the VCE is simplified into NOP_EXPR and that is later
> folded into just comparison of the unsigned char value against 0.
> If I under the debugger without the match.pd change the VCE into NOP_EXPR
> at the start of TER, then without the expr.c change indeed & 1 is emitted
> but then combine removes it again (non-zero bits logic makes that possible),
> with the expr.c change we don't emit it at all, which is good because
> we can't rely on the combine managing to do it in all cases.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> 
> 2021-02-24  Jakub Jelinek  
> 
>   PR tree-optimization/80635
>   * match.pd (view_convert (A) -> (bool) (A)): Simplify VCE of
>   ssa_name_has_boolean_range with integral type to bool.
>   * expr.c (expand_expr_real_2): Avoid REDUCE_BIT_FIELD for cast to
>   boolean if operand is ssa_name_has_boolean_range.
> 
>   * g++.dg/warn/pr80635-1.C: New test.
>   * g++.dg/warn/pr80635-2.C: New test.
> 
> --- gcc/match.pd.jj   2021-02-18 16:21:01.0 +0100
> +++ gcc/match.pd  2021-02-23 13:16:15.095416289 +0100
> @@ -3316,7 +3316,13 @@ (define_operator_list COND_TERNARY
>(view_convert @0)
>(if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
> && (INTEGRAL_TYPE_P (TREE_TYPE (@0)) || POINTER_TYPE_P (TREE_TYPE 
> (@0)))
> -   && TYPE_PRECISION (type) == TYPE_PRECISION (TREE_TYPE (@0)))
> +   && (TYPE_PRECISION (type) == TYPE_PRECISION (TREE_TYPE (@0))
> +|| (TREE_CODE (type) == BOOLEAN_TYPE
> +&& TREE_CODE (@0) == SSA_NAME
> +&& TREE_CODE (TREE_TYPE (@0)) != BOOLEAN_TYPE
> +&& TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (@0))
> +&& type_has_mode_precision_p (TREE_TYPE (@0))
> +&& ssa_name_has_boolean_range (@0
> (convert @0)))
>  
>  /* Strip inner integral conversions that do not change precision or size, or
> --- gcc/expr.c.jj 2021-02-02 10:01:26.483903572 +0100
> +++ gcc/expr.c2021-02-23 13:10:26.400323532 +0100
> @@ -8790,6 +8790,15 @@ expand_expr_real_2 (sepops ops, rtx targ
> && GET_CODE (op0) == SUBREG)
>   SUBREG_PROMOTED_VAR_P (op0) = 0;
>  
> +   /* Don't reduce to boolean range if we know the operand
> +  already has a boolean range.  */
> +   if (reduce_bit_field
> +   && TREE_CODE (type) == BOOLEAN_TYPE
> +   && TREE_CODE (treeop0) == SSA_NAME
> +   && TREE_CODE (TREE_TYPE (treeop0)) != BOOLEAN_TYPE
> +   && type_has_mode_precision_p (TREE_TYPE (treeop0))
> +   && ssa_name_has_boolean_range (treeop0))
> + return op0;
> return REDUCE_BIT_FIELD (op0);
>   }
>  
> --- gcc/testsuite/g++.dg/warn/pr80635-1.C.jj  2021-02-23 13:17:35.398516088 
> +0100
> +++ gcc/testsuite/g++.dg/warn/pr80635-1.C 2021-02-23 13:19:21.712324315 
> +0100
> @@ -0,0 +1,46 @@
> +// PR tree-optimization/80635
> +// { dg-do compile { target c++11 } }
> +// { dg-options "-O2 -Wmaybe-uninitialized" }
> +
> +using size_t = decltype (sizeof (1));
> +inline void *operator new (size_t, void *p) { return p; }
> +template
> +struct optional
> +{
> +  optional () : m_dummy (), live (false) {}
> +  void emplace () { new (_item) T (); live = true; }
> +  ~optional () { if (live) m_item.~T (); }
> +
> +  union
> +  {
> +struct {} m_dummy;
> +T m_item;
> +  };
> +  bool live;
> +};
> +
> +extern int get ();
> +extern void set (int);
> +
> +struct A
> +{
> +  A () : m (get ()) {}
> +  ~A () { set (m); } // { dg-bogus "may be used uninitialized in this 

[PATCH] fold-const: Fix up ((1 << x) & y) != 0 folding for vectors [PR99225]

2021-02-24 Thread Jakub Jelinek via Gcc-patches
Hi!

This optimization was written purely with scalar integers in mind,
can work fine even with vectors, but we can't use build_int_cst but
need to use build_one_cst instead.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2021-02-24  Jakub Jelinek  

PR tree-optimization/99225
* fold-const.c (fold_binary_loc) : In (x & (1 << y)) != 0
to ((x >> y) & 1) != 0 simplifications use build_one_cst instead of
build_int_cst (..., 1).  Formatting fixes.

* gcc.c-torture/compile/pr99225.c: New test.

--- gcc/fold-const.c.jj 2021-02-23 09:49:40.0 +0100
+++ gcc/fold-const.c2021-02-23 19:53:33.143763292 +0100
@@ -12044,23 +12044,23 @@ fold_binary_loc (location_t loc, enum tr
  && integer_onep (TREE_OPERAND (arg00, 0)))
{
  tree tem = fold_build2_loc (loc, RSHIFT_EXPR, TREE_TYPE (arg00),
- arg01, TREE_OPERAND (arg00, 1));
+ arg01, TREE_OPERAND (arg00, 1));
  tem = fold_build2_loc (loc, BIT_AND_EXPR, TREE_TYPE (arg0), tem,
-build_int_cst (TREE_TYPE (arg0), 1));
+build_one_cst (TREE_TYPE (arg0)));
  return fold_build2_loc (loc, code, type,
- fold_convert_loc (loc, TREE_TYPE (arg1), tem),
- arg1);
+ fold_convert_loc (loc, TREE_TYPE (arg1),
+   tem), arg1);
}
  else if (TREE_CODE (arg01) == LSHIFT_EXPR
   && integer_onep (TREE_OPERAND (arg01, 0)))
{
  tree tem = fold_build2_loc (loc, RSHIFT_EXPR, TREE_TYPE (arg01),
- arg00, TREE_OPERAND (arg01, 1));
+ arg00, TREE_OPERAND (arg01, 1));
  tem = fold_build2_loc (loc, BIT_AND_EXPR, TREE_TYPE (arg0), tem,
-build_int_cst (TREE_TYPE (arg0), 1));
+build_one_cst (TREE_TYPE (arg0)));
  return fold_build2_loc (loc, code, type,
- fold_convert_loc (loc, TREE_TYPE (arg1), tem),
- arg1);
+ fold_convert_loc (loc, TREE_TYPE (arg1),
+   tem), arg1);
}
}
 
--- gcc/testsuite/gcc.c-torture/compile/pr99225.c.jj2021-02-23 
20:12:01.825464969 +0100
+++ gcc/testsuite/gcc.c-torture/compile/pr99225.c   2021-02-23 
20:11:45.962640464 +0100
@@ -0,0 +1,31 @@
+/* PR tree-optimization/99225 */
+
+typedef int V __attribute__((vector_size (4 * sizeof (int;
+
+void
+foo (V *x)
+{
+  x[2] = (x[0] & (1 << x[1])) != 0;
+}
+
+void
+bar (V *x)
+{
+  x[2] = ((1 << x[1]) & x[0]) != 0;
+}
+
+void
+baz (V *x)
+{
+  V a = 1 << x[1];
+  V b = a & x[0];
+  x[2] = b != 0;
+}
+
+void
+qux (V *x)
+{
+  V a = 1 << x[1];
+  V b = x[0] & a;
+  x[2] = b != 0;
+}

Jakub



[PATCH] match.pd, expand: Fold VCE from integer with [0, 1] range to bool into NOP_EXPR [PR80635]

2021-02-24 Thread Jakub Jelinek via Gcc-patches
Hi!

SRA creates a VCE from integer to bool and that VCE then prevents other
optimizations or e.g. prevents the uninit pass from avoiding a false
positive warning.

In the PR using NOP_EXPR has been discussed as one possibility and has been
rejected because at expansion it will emit a superfluous & 1 operation.
I still think it is a good idea to use NOP_EXPR and so have changed
expansion to not emit that & 1 operation in that case.  Both spots are
done with tight conditions (bool only etc.), as I'd like to fix just
that case and not introduce a wider general optimization, but perhaps
later we could lift it and do a general range of arbitrary
type_has_mode_precision_p to non-type_has_mode_precision_p with same
TYPE_MODE case.

On this particular case, the expr.c part isn't really needed, because we
have an unsigned char SSA_NAME with values 0 or 1, VCE to bool and
comparison of that bool against false in a condition, and with the
match.pd change the VCE is simplified into NOP_EXPR and that is later
folded into just comparison of the unsigned char value against 0.
If I under the debugger without the match.pd change the VCE into NOP_EXPR
at the start of TER, then without the expr.c change indeed & 1 is emitted
but then combine removes it again (non-zero bits logic makes that possible),
with the expr.c change we don't emit it at all, which is good because
we can't rely on the combine managing to do it in all cases.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2021-02-24  Jakub Jelinek  

PR tree-optimization/80635
* match.pd (view_convert (A) -> (bool) (A)): Simplify VCE of
ssa_name_has_boolean_range with integral type to bool.
* expr.c (expand_expr_real_2): Avoid REDUCE_BIT_FIELD for cast to
boolean if operand is ssa_name_has_boolean_range.

* g++.dg/warn/pr80635-1.C: New test.
* g++.dg/warn/pr80635-2.C: New test.

--- gcc/match.pd.jj 2021-02-18 16:21:01.0 +0100
+++ gcc/match.pd2021-02-23 13:16:15.095416289 +0100
@@ -3316,7 +3316,13 @@ (define_operator_list COND_TERNARY
   (view_convert @0)
   (if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
&& (INTEGRAL_TYPE_P (TREE_TYPE (@0)) || POINTER_TYPE_P (TREE_TYPE (@0)))
-   && TYPE_PRECISION (type) == TYPE_PRECISION (TREE_TYPE (@0)))
+   && (TYPE_PRECISION (type) == TYPE_PRECISION (TREE_TYPE (@0))
+  || (TREE_CODE (type) == BOOLEAN_TYPE
+  && TREE_CODE (@0) == SSA_NAME
+  && TREE_CODE (TREE_TYPE (@0)) != BOOLEAN_TYPE
+  && TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (@0))
+  && type_has_mode_precision_p (TREE_TYPE (@0))
+  && ssa_name_has_boolean_range (@0
(convert @0)))
 
 /* Strip inner integral conversions that do not change precision or size, or
--- gcc/expr.c.jj   2021-02-02 10:01:26.483903572 +0100
+++ gcc/expr.c  2021-02-23 13:10:26.400323532 +0100
@@ -8790,6 +8790,15 @@ expand_expr_real_2 (sepops ops, rtx targ
  && GET_CODE (op0) == SUBREG)
SUBREG_PROMOTED_VAR_P (op0) = 0;
 
+ /* Don't reduce to boolean range if we know the operand
+already has a boolean range.  */
+ if (reduce_bit_field
+ && TREE_CODE (type) == BOOLEAN_TYPE
+ && TREE_CODE (treeop0) == SSA_NAME
+ && TREE_CODE (TREE_TYPE (treeop0)) != BOOLEAN_TYPE
+ && type_has_mode_precision_p (TREE_TYPE (treeop0))
+ && ssa_name_has_boolean_range (treeop0))
+   return op0;
  return REDUCE_BIT_FIELD (op0);
}
 
--- gcc/testsuite/g++.dg/warn/pr80635-1.C.jj2021-02-23 13:17:35.398516088 
+0100
+++ gcc/testsuite/g++.dg/warn/pr80635-1.C   2021-02-23 13:19:21.712324315 
+0100
@@ -0,0 +1,46 @@
+// PR tree-optimization/80635
+// { dg-do compile { target c++11 } }
+// { dg-options "-O2 -Wmaybe-uninitialized" }
+
+using size_t = decltype (sizeof (1));
+inline void *operator new (size_t, void *p) { return p; }
+template
+struct optional
+{
+  optional () : m_dummy (), live (false) {}
+  void emplace () { new (_item) T (); live = true; }
+  ~optional () { if (live) m_item.~T (); }
+
+  union
+  {
+struct {} m_dummy;
+T m_item;
+  };
+  bool live;
+};
+
+extern int get ();
+extern void set (int);
+
+struct A
+{
+  A () : m (get ()) {}
+  ~A () { set (m); }   // { dg-bogus "may be used uninitialized in this 
function" }
+
+  int m;
+};
+
+struct B
+{
+  B ();
+  ~B ();
+};
+
+void func ()
+{
+  optional maybe_a;
+  optional maybe_b;
+
+  maybe_a.emplace ();
+  maybe_b.emplace ();
+}
--- gcc/testsuite/g++.dg/warn/pr80635-2.C.jj2021-02-23 13:17:38.426482145 
+0100
+++ gcc/testsuite/g++.dg/warn/pr80635-2.C   2021-02-23 13:27:34.215803360 
+0100
@@ -0,0 +1,31 @@
+// PR tree-optimization/80635
+// { dg-do compile { target c++17 } }
+// { dg-options "-O2 -Wmaybe-uninitialized" }
+
+#include 
+
+extern int get ();
+extern void set (int);
+
+struct A
+{
+  A () : m (get ()) {}

[patch] Revert: "Don't build insn-extract.o with rtl checking"

2021-02-24 Thread Matthias Klose
Revert: "Don't build insn-extract.o with rtl checking".

PR target/98746 is now fixed, compilation is now below 100MB from 8GB.

Approved on irc by Richard Biener.

Matthias

--- a/gcc/genextract.c
+++ b/gcc/genextract.c
@@ -365,8 +365,6 @@ print_header (void)
 #define IN_TARGET_CODE 1\n\
 #include \"config.h\"\n\
 #include \"system.h\"\n\
-#undef ENABLE_RTL_CHECKING\n\
-#undef ENABLE_RTL_FLAG_CHECKING\n\
 #include \"coretypes.h\"\n\
 #include \"tm.h\"\n\
 #include \"rtl.h\"\n\



[PATCH] c/99224 - avoid ICEing on invalid __builtin_next_arg

2021-02-24 Thread Richard Biener
This avoids crashes with __builtin_next_arg on non-parameters.  For
the specific testcase we arrive with an anonymous SSA_NAME so that
SSA_NAME_VAR becomes NULL and we crash.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

2021-02-24  Richard Biener  

PR c/99224
* builtins.c (fold_builtin_next_arg): Avoid NULL arg.

* gcc.dg/pr99224.c: New testcase.
---
 gcc/builtins.c | 3 ++-
 gcc/testsuite/gcc.dg/pr99224.c | 6 ++
 2 files changed, 8 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr99224.c

diff --git a/gcc/builtins.c b/gcc/builtins.c
index 0aed008687c..42150cee0bd 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -12597,7 +12597,8 @@ fold_builtin_next_arg (tree exp, bool va_start_p)
   arg = CALL_EXPR_ARG (exp, 0);
 }
 
-  if (TREE_CODE (arg) == SSA_NAME)
+  if (TREE_CODE (arg) == SSA_NAME
+  && SSA_NAME_VAR (arg))
 arg = SSA_NAME_VAR (arg);
 
   /* We destructively modify the call to be __builtin_va_start (ap, 0)
diff --git a/gcc/testsuite/gcc.dg/pr99224.c b/gcc/testsuite/gcc.dg/pr99224.c
new file mode 100644
index 000..f6e9ac8eba3
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr99224.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+
+void f (char *c, ...)
+{
+  __builtin_next_arg (*c); /* { dg-warning "not last named argument" } */
+}
-- 
2.26.2


Re: add rv64im{,c,fc} multilibs

2021-02-24 Thread Alexandre Oliva
On Feb 23, 2021, Jim Wilson  wrote:

> If we add default multilibs for you, then to be fair, we need to add
> default multilibs for other people that ask, and before long we are trying
> to build hundreds or maybe even thousands of multilibs by default which is
> unworkable.

*nod*, it's a very familiar issue to me, I know where that's coming
from, no worries.  I expected the change would be turned down, and for
good reason, unless there was say an emerging growth of adoption on
those multilibs, which I wasn't aware of.  The expectation didn't stop
me from offering the patch just in case, that's all.

> People that want a different set can define their own, and we have
> made it easy for people to define their own sets of multilibs as Kito
> pointed out.

*nod*, thanks,

-- 
Alexandre Oliva, happy hacker  https://FSFLA.org/blogs/lxo/
   Free Software Activist GNU Toolchain Engineer
Vim, Vi, Voltei pro Emacs -- GNUlius Caesar


Re: [PATCH] clear more front end VLA bounds from IL (PR 97172)

2021-02-24 Thread Richard Biener via Gcc-patches
On Tue, Feb 23, 2021 at 8:53 PM Martin Sebor via Gcc-patches
 wrote:
>
> Adding attribute access to declarations of functions that take
> VLA arguments relies on the front end adding attribute "arg spec"
> to each VLA parameter.  Like the VLA bounds in attribute access,
> the same VLA bounds in attribute "arg spec" can cause trouble
> during LTO streaming which expects front end trees to have been
> either gimplified or removed.  For some reason, with arg spec
> the LTO abort happens only with -fpic/-shared and so it escaped
> testing in the prior fixes.
>
> The attached patch clears the bounds from attribute "arg spec."

OK.

Richard.

> Martin


Re: [PATCH]middle-end slp: fix accidental resource re-use of slp_tree (PR99220)

2021-02-24 Thread Richard Biener
On Tue, 23 Feb 2021, Tamar Christina wrote:

> Hi Richi,
> 
> The attached testcase shows a bug where two nodes end up with the same 
> pointer.
> During the loop that analyzes all the instances
> in optimize_load_redistribution_1 we do
> 
>   if (value)
> {
>   SLP_TREE_REF_COUNT (value)++;
>   SLP_TREE_CHILDREN (root)[i] = value;
>   vect_free_slp_tree (node);
> }
> 
> when doing a replacement.  When this is done and the refcount for the node
> reaches 0, the node is removed, which allows the libc to return the pointer
> again in the next call to new, which it does..
> 
> First instance
> 
> note:   node 0x5325f48 (max_nunits=1, refcnt=2)
> note:   op: VEC_PERM_EXPR
> note:   { }
> note:   lane permutation { 0[0] 1[1] 0[2] 1[3] }
> note:   children 0x5325db0 0x5325200
> 
> Second instance
> 
> note:   node 0x5325f48 (max_nunits=1, refcnt=1)
> note:   op: VEC_PERM_EXPR
> note:   { }
> note:   lane permutation { 0[0] 1[1] }
> note:   children 0x53255b8 0x5325530
> 
> This will end up with the illegal construction of
> 
> note:   node 0x53258e8 (max_nunits=2, refcnt=2)
> note:   op template: slp_patt_57 = .COMPLEX_MUL (_16, _16);
> note:   stmt 0 _16 = _14 - _15;
> note:   stmt 1 _23 = _17 + _22;
> note:   children 0x53257d8 0x5325d28
> note:   node 0x53257d8 (max_nunits=2, refcnt=3)
> note:   op template: l$b_4 = MEM[(const struct a &)_3].b;
> note:   stmt 0 l$b_4 = MEM[(const struct a &)_3].b;
> note:   stmt 1 l$c_5 = MEM[(const struct a &)_3].c;
> note:   load permutation { 0 1 }
> note:   node 0x5325d28 (max_nunits=2, refcnt=8)
> note:   op template: l$b_4 = MEM[(const struct a &)_3].b;
> note:   stmt 0 l$b_4 = MEM[(const struct a &)_3].b;
> note:   stmt 1 l$c_5 = MEM[(const struct a &)_3].c;
> note:   stmt 2 l$b_4 = MEM[(const struct a &)_3].b;
> note:   stmt 3 l$c_5 = MEM[(const struct a &)_3].c;
> note:   load permutation { 0 1 0 1 }
> 
> To prevent this my initial thought was to add the temporary VEC_PERM_EXPR 
> nodes
> to the bst_map cache and increase their refcnt one more.  However since 
> bst_map
> is gated on scalar statements and these nodes have none we can't do that.
> 
> Instead I realized that load_map is really only a visited list at the top 
> level.
> So instead of returning the reference, we should return NULL.
> 
> What this means is that it will no replacement was found at that level.  This 
> is
> fine since these VEC_PERM_EXPR are single use.  So while the any other node is
> an indication to use the cache, VEC_PERM_EXPR are an indication to avoid it.

I don't understand really.  Waiting for the other patch to be pushed so
I can eventually have a look, but see below.

> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> 
> Ok for master?
> 
> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
>   PR tree-optimization/99220
>   * tree-vect-slp.c (optimize_load_redistribution_1): Don't use
>   VEC_PERM_EXPR in cache.
> 
> gcc/testsuite/ChangeLog:
> 
>   PR tree-optimization/99220
>   * g++.dg/vect/pr99220.cc: New test.
> 
> --- inline copy of patch -- 
> diff --git a/gcc/testsuite/g++.dg/vect/pr99220.cc 
> b/gcc/testsuite/g++.dg/vect/pr99220.cc
> new file mode 100755
> index 
> ..ff3058832b742414202a8ada0a9dafc72c9a54aa
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/vect/pr99220.cc
> @@ -0,0 +1,29 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-w -O3 -march=armv8.3-a" { target { aarch64*-*-* 
> } } } */
> +
> +class a {
> +  float b;
> +  float c;
> +
> +public:
> +  a(float d, float e) : b(d), c(e) {}
> +  a operator+(a d) { return a(b + d.b, c + d.c); }
> +  a operator-(a d) { return a(b - d.b, c - d.c); }
> +  a operator*(a d) { return a(b * b - c * c, b * c + c * d.b); }
> +};
> +long f;
> +a *g;
> +class {
> +  a *h;
> +  long i;
> +  a *j;
> +
> +public:
> +  void k() {
> +a l = h[0], m = g[i], n = l * g[1], o = l * j[8];
> +g[i] = m + n;
> +g[i + 1] = m - n;
> +j[f] = o;
> +  }
> +} p;
> +main() { p.k(); }
> diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
> index 
> 605873714a5cafaaf822f61f1f769f96b3876694..e631463be8fc5b2de355e674a9c96665beb9516c
>  100644
> --- a/gcc/tree-vect-slp.c
> +++ b/gcc/tree-vect-slp.c
> @@ -2292,7 +2292,12 @@ optimize_load_redistribution_1 
> (scalar_stmts_to_slp_tree_map_t *bst_map,
>   slp_tree root)
>  {
>if (slp_tree *leader = load_map->get (root))
> -return *leader;
> +{
> +  if (SLP_TREE_CODE (root) == VEC_PERM_EXPR)
> + return NULL;

But this will then only optimize the first occurance.  Wouldn't it be
better to increase the refcount at

  load_map->put (root, node);

and walk load_map at the end, releasing refs owned by it like we do
for bst_map?

> +  else
> + return *leader;
> +}
>  
>

Re: [PATCH v2] middle-end slp: fix sharing of SLP only patterns.

2021-02-24 Thread Richard Biener
On Tue, 23 Feb 2021, Tamar Christina wrote:

> Hi Richi,
> 
> The attached testcase ICEs due to a couple of issues.
> In the testcase you have two SLP instances that share the majority of their
> definition with each other.  One tree defines a COMPLEX_MUL sequence and the
> other tree a COMPLEX_FMA.
> 
> The ice happens because:
> 
> 1. the refcounts are wrong, in particular the FMA case doesn't correctly count
> the references for the COMPLEX_MUL that it consumes.
> 
> 2. when the FMA is created it incorrectly assumes it can just tear apart the 
> MUL
> node that it's consuming.  This is wrong and should only be done when there is
> no more uses of the node, in which case the vector only pattern is no longer
> relevant.
> 
> To fix the last part the SLP only pattern reset code was moved into
> vect_free_slp_tree which results in cleaner code.  I also think it does belong
> there since that function knows when there are no more uses of the node and so
> the pattern should be unmarked, so when the the vectorizer is inspecting the 
> BB
> it doesn't find the now invalid vector only patterns.
> 
> The patch also clears the SLP_TREE_REPRESENTATIVE when stores are removed such
> that we don't hit an error later trying to free the stmt_vec_info again.
> 
> Lastly it also tweaks the results of whether a pattern has been detected or 
> not
> to return true when another SLP instance has created a pattern that is only 
> used
> by a different instance (due to the trees being unshared).
> 
> Instead of ICEing this code now produces
> 
> adrpx1, .LANCHOR0
> add x2, x1, :lo12:.LANCHOR0
> moviv1.2s, 0
> mov w0, 0
> ldr x4, [x1, #:lo12:.LANCHOR0]
> ldrsw   x3, [x2, 16]
> ldr x1, [x2, 8]
> ldrsw   x2, [x2, 20]
> ldr d0, [x4]
> ldr d2, [x1, x3, lsl 3]
> fcmla   v2.2s, v0.2s, v0.2s, #0
> fcmla   v2.2s, v0.2s, v0.2s, #90
> str d2, [x1, x3, lsl 3]
> fcmla   v1.2s, v0.2s, v0.2s, #0
> fcmla   v1.2s, v0.2s, v0.2s, #90
> str d1, [x1, x2, lsl 3]
> ret
> 
> PS. This testcase actually shows that the codegen we get in these cases is not
> optimal. It should generate a MUL + ADD instead MUL + FMA.
> 
> But that's for GCC 12.
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

OK.

Thanks,
Richard.

> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
>   PR tree-optimization/99149
>   * tree-vect-slp-patterns.c (vect_detect_pair_op): Don't recreate the
>   buffer.
>   (vect_slp_reset_pattern): Remove.
>   (complex_fma_pattern::matches): Remove call to vect_slp_reset_pattern.
>   (complex_mul_pattern::build, complex_fma_pattern::build,
>   complex_fms_pattern::build): Fix ref counts.
>   * tree-vect-slp.c (vect_free_slp_tree): Undo SLP only pattern relevancy
>   when node is being deleted.
>   (vect_match_slp_patterns_2): Correct result of cache hit on patterns.
>   (vect_schedule_slp): Invalidate SLP_TREE_REPRESENTATIVE of removed
>   stores.
>   * tree-vectorizer.c (vec_info::new_stmt_vec_info): Initialize value.
> 
> gcc/testsuite/ChangeLog:
> 
>   PR tree-optimization/99149
>   * g++.dg/vect/pr99149.cc: New test.
> 
> --- inline copy of patch -- 
> diff --git a/gcc/testsuite/g++.dg/vect/pr99149.cc 
> b/gcc/testsuite/g++.dg/vect/pr99149.cc
> new file mode 100755
> index 
> ..902a26f576fcc79d2802bec093668674cca1c84f
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/vect/pr99149.cc
> @@ -0,0 +1,28 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-w -O3 -march=armv8.3-a -fdump-tree-slp-all" { 
> target { aarch64*-*-* } } } */
> +
> +class a {
> +  float b;
> +  float c;
> +
> +public:
> +  a(float d, float e) : b(d), c(e) {}
> +  a operator+(a d) { return a(b + d.b, c + d.c); }
> +  a operator*(a d) { return a(b * b - c * c, b * c + c * d.b); }
> +};
> +int f, g;
> +class {
> +  a *h;
> +  a *i;
> +
> +public:
> +  void j() {
> +a k = h[0], l = i[g], m = k * i[f];
> +i[g] = l + m;
> +i[f] = m;
> +  }
> +} n;
> +main() { n.j(); }
> +
> +/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_MUL" 1 "slp2" } } */
> +/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_FMA" 1 "slp2" } } */
> diff --git a/gcc/tree-vect-slp-patterns.c b/gcc/tree-vect-slp-patterns.c
> index 
> f0817da9f622d22e3df2e30410d1cf610b4ffa1d..1e2769662a54229ab8e24390f97dfe206f17ab57
>  100644
> --- a/gcc/tree-vect-slp-patterns.c
> +++ b/gcc/tree-vect-slp-patterns.c
> @@ -407,9 +407,8 @@ vect_detect_pair_op (slp_tree node1, slp_tree node2, 
> lane_permutation_t ,
>  
>if (result != CMPLX_NONE && ops != NULL)
>  {
> -  ops->create (2);
> -  ops->quick_push (node1);
> -  ops->quick_push (node2);
> +  ops->safe_push (node1);
> +  ops->safe_push (node2);
>  }
>return result;
>  }
> @@ -1090,15 +1089,17 @@ complex_mul_pattern::build (vec_info