[PATCH] arm: subdivide the type attribute "alu_shfit_imm"

2020-09-28 Thread Qian Jianhua
The type attribute "alu_shfit_imm" is subdivided into
"alu_shift_imm_lsl_1to4" and "alu_shift_imm_other", to accommodate
optimazations of some microarchitectures.

Here is the detailed discussion.
https://gcc.gnu.org/pipermail/gcc/2020-September/233594.html

ChangeLog:
2020-09-29 Qian jianhua 

gcc/
* config/arm/types.md (define_attr "autodetect_type"): New.
(define_attr "type"): Subdivide alu_shift_imm.
* config/arm/common.md: New file.
* config/aarch64/predicates.md:Include common.md.
* config/arm/predicates.md:Include common.md.
* config/aarch64/aarch64.md (*add__): Set autodetect_type.
(*add__si_uxtw): Likewise.
(*sub__): Likewise.
(*sub__si_uxtw): Likewise.
(*neg__2): Likewise.
(*neg__si2_uxtw): Likewise.
* config/arm/arm.md (*addsi3_carryin_shift): Likewise.
(add_not_shift_cin): Likewise.
(*subsi3_carryin_shift): Likewise.
(*subsi3_carryin_shift_alt): Likewise.
(*rsbsi3_carryin_shift): Likewise.
(*rsbsi3_carryin_shift_alt): Likewise.
(*arm_shiftsi3): Likewise.
(*_multsi): Likewise.
(*_shiftsi): Likewise.
(subsi3_carryin): Set new type.
(*if_arith_move): Set new type.
(*if_move_arith): Set new type.
(define_attr "core_cycles"): Use new type.
* config/arm/arm-fixed.md (arm_ssatsihi_shift): Set autodetect_type.
* config/arm/thumb2.md (*orsi_not_shiftsi_si): Likewise.
(*thumb2_shiftsi3_short): Set new type.
* config/aarch64/falkor.md (falkor_alu_1_xyz): Use new type.
* config/aarch64/saphira.md (saphira_alu_1_xyz): Likewise.
* config/aarch64/thunderx.md (thunderx_arith_shift): Likewise.
* config/aarch64/thunderx2t99.md (thunderx2t99_alu_shift): Likewise.
* config/aarch64/thunderx3t110.md (thunderx3t110_alu_shift): Likewise.
(thunderx3t110_alu_shift1): Likewise.
* config/aarch64/tsv110.md (tsv110_alu_shift): Likewise.
* config/arm/arm1020e.md (1020alu_shift_op): Likewise.
* config/arm/arm1026ejs.md (alu_shift_op): Likewise.
* config/arm/arm1136jfs.md (11_alu_shift_op): Likewise.
* config/arm/arm926ejs.md (9_alu_op): Likewise.
* config/arm/cortex-a15.md (cortex_a15_alu_shift): Likewise.
* config/arm/cortex-a17.md (cortex_a17_alu_shiftimm): Likewise.
* config/arm/cortex-a5.md (cortex_a5_alu_shift): Likewise.
* config/arm/cortex-a53.md (cortex_a53_alu_shift): Likewise.
* config/arm/cortex-a57.md (cortex_a57_alu_shift): Likewise.
* config/arm/cortex-a7.md (cortex_a7_alu_shift): Likewise.
* config/arm/cortex-a8.md (cortex_a8_alu_shift): Likewise.
* config/arm/cortex-a9.md (cortex_a9_dp_shift): Likewise.
* config/arm/cortex-m4.md (cortex_m4_alu): Likewise.
* config/arm/cortex-m7.md (cortex_m7_alu_shift): Likewise.
* config/arm/cortex-r4.md (cortex_r4_alu_shift): Likewise.
* config/arm/exynos-m1.md (exynos_m1_alu_shift): Likewise.
* config/arm/fa526.md (526_alu_shift_op): Likewise.
* config/arm/fa606te.md (606te_alu_op): Likewise.
* config/arm/fa626te.md (626te_alu_shift_op): Likewise.
* config/arm/fa726te.md (726te_alu_shift_op): Likewise.
* config/arm/fmp626.md (mp626_alu_shift_op): Likewise.
* config/arm/marvell-pj4.md (pj4_shift): Likewise.
(pj4_shift_conds): Likewise.
(pj4_alu_shift): Likewise.
(pj4_alu_shift_conds): Likewise.
* config/arm/xgene1.md (xgene1_alu): Likewise.
* config/arm/arm.c (xscale_sched_adjust_cost): Likewise.


Test Results:
* Bootstrap on aarch64 --- [OK]
* Regression tests on aarch64- [OK]


---
 gcc/config/aarch64/aarch64.md   | 12 ++---
 gcc/config/aarch64/falkor.md|  2 +-
 gcc/config/aarch64/predicates.md|  2 +
 gcc/config/aarch64/saphira.md   |  2 +-
 gcc/config/aarch64/thunderx.md  |  2 +-
 gcc/config/aarch64/thunderx2t99.md  |  2 +-
 gcc/config/aarch64/thunderx3t110.md |  4 +-
 gcc/config/aarch64/tsv110.md|  2 +-
 gcc/config/arm/arm-fixed.md |  2 +-
 gcc/config/arm/arm.c|  3 +-
 gcc/config/arm/arm.md   | 39 ++--
 gcc/config/arm/arm1020e.md  |  2 +-
 gcc/config/arm/arm1026ejs.md|  2 +-
 gcc/config/arm/arm1136jfs.md|  2 +-
 gcc/config/arm/arm926ejs.md |  2 +-
 gcc/config/arm/common.md| 37 +++
 gcc/config/arm/cortex-a15.md|  2 +-
 gcc/config/arm/cortex-a17.md|  2 +-
 gcc/config/arm/cortex-a5.md |  2 +-
 gcc/config/arm/cortex-a53.md|  2 +-
 gcc/config/arm/cortex-a57.md|  2 +-
 gcc/config/arm/cortex-a7.md |  2 +-
 gcc/config/arm/cortex-a8.md |  2 +-
 gcc/config/arm/cortex-a9.md |  2 +-
 gcc/config/arm/cortex-m4.md |  2 +-
 

[PATCH] Fix GCC 10+ build failure with zstd version 1.2.0 or older.

2020-09-28 Thread Jim Wilson
Extends the configure check for zstd.h to also verify the zstd version,
since gcc requires features that only exist in 1.3.0 and newer.  Without
this patch we get a build error for lto-compress.c when using an old zstd
version.

Tested with builds using zstd 0.5.1, 1.2.0, 1.3.0, and 1.3.3, and checking
to see whether zstd was enabled for the build or not.

OK?

Jim

gcc/
PR 97183
* configure.ac (gcc_cv_header_zstd_h): Check ZSTD_VERISON_NUMBER.
* configure: Regenerated.
---
 gcc/configure| 11 ---
 gcc/configure.ac |  7 ++-
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/gcc/configure b/gcc/configure
index 33a3e34029f..b05a3714fb2 100755
--- a/gcc/configure
+++ b/gcc/configure
@@ -10022,9 +10022,14 @@ $as_echo_n "checking for zstd.h... " >&6; }
 if ${gcc_cv_header_zstd_h+:} false; then :
   $as_echo_n "(cached) " >&6
 else
-  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+  # We require version 1.3.0 or later.  This is the first version that has
+# ZSTD_getFrameContentSize.
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
 #include 
+#if ZSTD_VERSION_NUMBER < 10300
+#error "need zstd 1.3.0 or better"
+#endif
 int
 main ()
 {
@@ -19013,7 +19018,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 19016 "configure"
+#line 19021 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -19119,7 +19124,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 19122 "configure"
+#line 19127 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
diff --git a/gcc/configure.ac b/gcc/configure.ac
index 975f6d97c4b..f5612161dcd 100644
--- a/gcc/configure.ac
+++ b/gcc/configure.ac
@@ -1376,8 +1376,13 @@ LDFLAGS="$LDFLAGS $ZSTD_LDFLAGS"
 
 AC_MSG_CHECKING(for zstd.h)
 AC_CACHE_VAL(gcc_cv_header_zstd_h,
+# We require version 1.3.0 or later.  This is the first version that has
+# ZSTD_getFrameContentSize.
 [AC_COMPILE_IFELSE([AC_LANG_PROGRAM(
-[[#include ]])],
+[[#include 
+#if ZSTD_VERSION_NUMBER < 10300
+#error "need zstd 1.3.0 or better"
+#endif]])],
   [gcc_cv_header_zstd_h=yes],
   [gcc_cv_header_zstd_h=no])])
 AC_MSG_RESULT($gcc_cv_header_zstd_h)
-- 
2.17.1



Re: [PATCH] RISC-V/libgcc: Use `-fasynchronous-unwind-tables' for LIB2_DIVMOD_FUNCS

2020-09-28 Thread Maciej W. Rozycki
Hi Jim,

> On Sun, Aug 30, 2020 at 11:39 PM Kito Cheng  wrote
> > Hi Maciej:
> > LGTM, thanks for your patch!
> 
> I don't see this patch in the FSF GCC tree.  Maciej are you going to
> commit it?  Or do you want us to commit it for you?

 Since my departure from WDC I have been largely away, travelling and 
doing all kinds of personal stuff.  I hoped someone would pick it up and 
make some progress with the generic change I proposed earlier:



which would make the RISC-V-specific hack redundant, but clearly no one 
could be bothered enough.  I have pushed this change then so as not to 
hinder RISC-V support in GCC.

 Thanks, Kito, for your review!

  Maciej


[committed] analyzer: remove unused field

2020-09-28 Thread David Malcolm via Gcc-patches
I added this field (and the struct itself) in the rewrite of region and
value-handling (808f4dfeb3a95f50f15e71148e5c1067f90a126d), but the field
was never used.

Found by cppcheck.

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to master as 20d16d61dd22a9bfb66d5c4a383d193037e8f16d.

gcc/analyzer/ChangeLog:
* diagnostic-manager.cc (null_assignment_sm_context::m_visitor):
Remove unused field.
---
 gcc/analyzer/diagnostic-manager.cc | 1 -
 1 file changed, 1 deletion(-)

diff --git a/gcc/analyzer/diagnostic-manager.cc 
b/gcc/analyzer/diagnostic-manager.cc
index 13dd3da606f..cb95a95ff0b 100644
--- a/gcc/analyzer/diagnostic-manager.cc
+++ b/gcc/analyzer/diagnostic-manager.cc
@@ -931,7 +931,6 @@ struct null_assignment_sm_context : public sm_context
   const program_state *m_new_state;
   const gimple *m_stmt;
   const program_point *m_point;
-  state_change_visitor *m_visitor;
   checker_path *m_emission_path;
   const extrinsic_state _ext_state;
 };
-- 
2.26.2



[committed] analyzer: add some missing FINAL OVERRIDEs

2020-09-28 Thread David Malcolm via Gcc-patches
Spotted by cppcheck.

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to master as c0ed6afef7897f32dc199da9a5430664fcbb61bb.

gcc/analyzer/ChangeLog:
* region-model.h (binop_svalue::dyn_cast_binop_svalue): Remove
redundant "virtual".  Add FINAL OVERRIDE.
(widening_svalue::dyn_cast_widening_svalue): Add FINAL OVERRIDE.
(compound_svalue::dyn_cast_compound_svalue): Likewise.
(conjured_svalue::dyn_cast_conjured_svalue): Likewise.
---
 gcc/analyzer/region-model.h | 20 
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/gcc/analyzer/region-model.h b/gcc/analyzer/region-model.h
index a61aff2c4b3..cfeac8d6951 100644
--- a/gcc/analyzer/region-model.h
+++ b/gcc/analyzer/region-model.h
@@ -806,7 +806,10 @@ public:
   }
 
   enum svalue_kind get_kind () const FINAL OVERRIDE { return SK_BINOP; }
-  virtual const binop_svalue *dyn_cast_binop_svalue () const { return this; }
+  const binop_svalue *dyn_cast_binop_svalue () const FINAL OVERRIDE
+  {
+return this;
+  }
 
   void dump_to_pp (pretty_printer *pp, bool simple) const FINAL OVERRIDE;
   void accept (visitor *v) const FINAL OVERRIDE;
@@ -1067,7 +1070,10 @@ public:
   }
 
   enum svalue_kind get_kind () const FINAL OVERRIDE { return SK_WIDENING; }
-  const widening_svalue *dyn_cast_widening_svalue () const { return this; }
+  const widening_svalue *dyn_cast_widening_svalue () const FINAL OVERRIDE
+  {
+return this;
+  }
 
   void dump_to_pp (pretty_printer *pp, bool simple) const FINAL OVERRIDE;
   void accept (visitor *v) const FINAL OVERRIDE;
@@ -1158,7 +1164,10 @@ public:
   compound_svalue (tree type, const binding_map );
 
   enum svalue_kind get_kind () const FINAL OVERRIDE { return SK_COMPOUND; }
-  const compound_svalue *dyn_cast_compound_svalue () const { return this; }
+  const compound_svalue *dyn_cast_compound_svalue () const FINAL OVERRIDE
+  {
+return this;
+  }
 
   void dump_to_pp (pretty_printer *pp, bool simple) const FINAL OVERRIDE;
   void accept (visitor *v) const FINAL OVERRIDE;
@@ -1263,7 +1272,10 @@ public:
   }
 
   enum svalue_kind get_kind () const FINAL OVERRIDE { return SK_CONJURED; }
-  const conjured_svalue *dyn_cast_conjured_svalue () const { return this; }
+  const conjured_svalue *dyn_cast_conjured_svalue () const FINAL OVERRIDE
+  {
+return this;
+  }
 
   void dump_to_pp (pretty_printer *pp, bool simple) const FINAL OVERRIDE;
   void accept (visitor *v) const FINAL OVERRIDE;
-- 
2.26.2



[committed] analyzer: fix ICE on non-pointer longjmp [PR97233]

2020-09-28 Thread David Malcolm via Gcc-patches
Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to master as r11-3512-g01eabbeadb645959d5dcb0f00f41c3565a8f54f1.

gcc/analyzer/ChangeLog:
PR analyzer/97233
* analyzer.cc (is_longjmp_call_p): Require the initial argument
to be a pointer.
* engine.cc (exploded_node::on_longjmp): Likewise.

gcc/testsuite/ChangeLog:
PR analyzer/97233
* gcc.dg/analyzer/pr97233.c: New test.
---
 gcc/analyzer/analyzer.cc| 5 -
 gcc/analyzer/engine.cc  | 1 +
 gcc/testsuite/gcc.dg/analyzer/pr97233.c | 8 
 3 files changed, 13 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/pr97233.c

diff --git a/gcc/analyzer/analyzer.cc b/gcc/analyzer/analyzer.cc
index 82d487858dc..c792dc38f27 100644
--- a/gcc/analyzer/analyzer.cc
+++ b/gcc/analyzer/analyzer.cc
@@ -218,7 +218,10 @@ is_longjmp_call_p (const gcall *call)
 {
   if (is_special_named_call_p (call, "longjmp", 2)
   || is_special_named_call_p (call, "siglongjmp", 2))
-return true;
+/* exploded_node::on_longjmp requires a pointer for the initial
+   argument.  */
+if (POINTER_TYPE_P (TREE_TYPE (gimple_call_arg (call, 0
+  return true;
 
   return false;
 }
diff --git a/gcc/analyzer/engine.cc b/gcc/analyzer/engine.cc
index aa43e4cb808..84eaa8415da 100644
--- a/gcc/analyzer/engine.cc
+++ b/gcc/analyzer/engine.cc
@@ -1277,6 +1277,7 @@ exploded_node::on_longjmp (exploded_graph ,
   region_model_context *ctxt) const
 {
   tree buf_ptr = gimple_call_arg (longjmp_call, 0);
+  gcc_assert (POINTER_TYPE_P (TREE_TYPE (buf_ptr)));
 
   region_model *new_region_model = new_state->m_region_model;
   const svalue *buf_ptr_sval = new_region_model->get_rvalue (buf_ptr, ctxt);
diff --git a/gcc/testsuite/gcc.dg/analyzer/pr97233.c 
b/gcc/testsuite/gcc.dg/analyzer/pr97233.c
new file mode 100644
index 000..86930aa5872
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/analyzer/pr97233.c
@@ -0,0 +1,8 @@
+void
+longjmp (__SIZE_TYPE__, int);
+
+void
+e7 (__SIZE_TYPE__ gr)
+{
+  longjmp (gr, 1);
+}
-- 
2.26.2



[committed] analyzer: fix sm_state_map::print

2020-09-28 Thread David Malcolm via Gcc-patches
In 10fc42a8396072912e9d9d940fba25950b3fdfc5 I converted state_t from
unsigned to const state *, but missed this comparison against 0.

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to master as 53d28fd4e16f163a9bd0c323f432914804f1348a.

gcc/analyzer/ChangeLog:
* program-state.cc (sm_state_map::print): Update check
for m_global_state being the start state.
---
 gcc/analyzer/program-state.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/analyzer/program-state.cc b/gcc/analyzer/program-state.cc
index 78b87d509e4..5bb8907e340 100644
--- a/gcc/analyzer/program-state.cc
+++ b/gcc/analyzer/program-state.cc
@@ -160,7 +160,7 @@ sm_state_map::print (const region_model *model,
   bool first = true;
   if (!multiline)
 pp_string (pp, "{");
-  if (m_global_state != 0)
+  if (m_global_state != m_sm.get_start_state ())
 {
   if (multiline)
pp_string (pp, "  ");
-- 
2.26.2



Re: [Patch, Fortran] libgomp: Silence unpack's may be used uninitialized warning

2020-09-28 Thread Damian Rouson
I've been seeing similar messages to this one for quite some time now -- I
think ~1 year.  I don't often use unpack so there are likely many other
causes too.  I haven't take the time to isolate them, but if I can do so
easily, I'll submit reports.

Damian

On Mon, Sep 28, 2020 at 2:12 PM Tobias Burnus 
wrote:

> There are more warnings, but I picked this one which shows up
> with default build options for GCC trunk – twice for each
> generated file:
>
> ../../../../repos/gcc/libgfortran/generated/unpack_i2.c:128:12: warning:
> ‘rstride’ may be used uninitialized [-Wmaybe-uninitialized]
> ../../../../repos/gcc/libgfortran/generated/unpack_i2.c:278:12: warning:
> ‘rstride’ may be used uninitialized [-Wmaybe-uninitialized]
>
> Hence, 13 times these messages (×2 warnings/file + ×2 with multilib) → 53
> 'warning:'.
>
> Seemingly, the compiler logic has changes as the previous location
> used to be sufficient to silence the compiler.
> (The warning is right if base_addr = NULL and dim = 0; the latter should
> not occur but the compiler does not know this.)
>
> Committed as obvious as r11-3508-g69c56ce673d1e1d4508e82053a32011f807c6088
>
> Tobias
>
> -
> Mentor Graphics (Deutschland) GmbH, Arnulfstraße 201, 80634 München /
> Germany
> Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung,
> Alexander Walter
>


Re: [PATCH] New patch for the port of gccgo to GNU/Hurd

2020-09-28 Thread Ian Lance Taylor via Gcc-patches
On Fri, Sep 25, 2020 at 8:04 AM Svante Signell  wrote:
>
> Latest Debian snapshot of gcc (20200917-1) FTBFS due to a missing hurd
> entry in the // +build line of libgo/go/net/fd_posix.go. Attached is a
> patch for that missing entry.

Thanks.  Committed to mainline.

Ian


Re: [PATCH, rs6000] Add non-relative jump table support on Power Linux

2020-09-28 Thread Segher Boessenkool
Hi hao Chen,

On Wed, Sep 09, 2020 at 04:55:29PM +0800, HAO CHEN GUI wrote:
>     Thanks for your advice. I removed macros defined in linux64.h and 
> linux.h. So they take relative jump tables by default. When 
> no-relative-jumptables is set, the absolute jump tables are taken. All 
> things relevant to section relocations are put in another patch. Thanks 
> again.

[ Please do not insert patches into discussions ]

> +/* Implement TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC.
> +   Return true if rs6000_relative_jumptables is set.  */

Don't just say what the code does (we can see that ;-) ); say *why*.
Of course it is terribly simple in this case, so maybe just that first
line is plenty.

> +/* Specify the machine mode that this machine uses
> +   for the index in the tablejump instruction.  */
> +#define CASE_VECTOR_MODE \
> +  (TARGET_32BIT || rs6000_relative_jumptables ? SImode : Pmode)

If TARGET_32BIT is true, SImode and Pmode are the same, so this is
simpler said as

#define CASE_VECTOR_MODE (rs6000_relative_jumptables ? SImode : Pmode)


I'll have the tablejump* patterns converted to paremeterized names
hopefully tonight or tomorrow, which will make your patch much easier
to read.  It looks like it will be fine, thanks :-)


Segher


[PATCH] c++: Diagnose visitors with different return types for std::visit [PR95904]

2020-09-28 Thread Ville Voutilainen via Gcc-patches
Not completely tested yet. This does fix the problem of converting
incompatible pointer-to-function types, and thus gets rid of the suggestion
that compiling the code with -fpermissive is a possibility. There
is a special-casing for visit() for visitation of a single variant, and there
we don't even instantiate the whole table mechanism. We should really
entertain the possibility of flattening the whole visitation table; then
this check could (at least in theory) be uniformly written as just
an iteration of all table elements, which is not so convenient to do
with the nested multitable. This seems like a worthy incremental
improvement to me.

2020-09-29  Ville Voutilainen  

PR libstdc++/95904
* include/std/variant (__same_types): New.
(__check_visitor_result): Likewise.
(__check_visitor_results): Likewise.
(visit(_Visitor&&, _Variants&&...)): Use __check_visitor_results
in case we're visiting just one variant.
(__gen_vtable_impl::_S_apply):
Check the visitor return type.
diff --git a/libstdc++-v3/include/std/variant b/libstdc++-v3/include/std/variant
index dd8847cf829..56de78407c4 100644
--- a/libstdc++-v3/include/std/variant
+++ b/libstdc++-v3/include/std/variant
@@ -182,7 +182,7 @@ namespace __variant
   // used for raw visitation with indices passed in
   struct __variant_idx_cookie { using type = __variant_idx_cookie; };
   // Used to enable deduction (and same-type checking) for std::visit:
-  template struct __deduce_visit_result { };
+  template struct __deduce_visit_result { using type = _Tp; };
 
   // Visit variants that might be valueless.
   template
@@ -1017,7 +1017,22 @@ namespace __variant
 
   static constexpr auto
   _S_apply()
-  { return _Array_type{&__visit_invoke}; }
+  {
+	constexpr bool __visit_ret_type_mismatch =
+	  _Array_type::__result_is_deduced::value
+	  && !is_same_v(),
+		std::declval<_Variants>()...))>;
+	if constexpr (__visit_ret_type_mismatch)
+	  {
+	static_assert(!__visit_ret_type_mismatch,
+			  "std::visit requires the visitor to have the same "
+			  "return type for all alternatives of a variant");
+	return __nonesuch{};
+	  }
+	else
+	  return _Array_type{&__visit_invoke};
+  }
 };
 
   template
@@ -1692,6 +1707,27 @@ namespace __variant
 			   std::forward<_Variants>(__variants)...);
 }
 
+  template 
+struct __same_types : public std::bool_constant<
+std::__and_...>::value> {};
+
+  template 
+decltype(auto) __check_visitor_result(_Visitor&& __vis,
+	  _Variant&& __variant)
+{
+  return std::forward<_Visitor>(__vis)(
+std::get<_Idx>(std::forward<_Variant>(__variant)));
+}
+
+  template 
+constexpr bool __check_visitor_results(std::index_sequence<_Idxs...>)
+{
+  return __same_types(
+	std::declval<_Visitor>(),
+	std::declval<_Variant>()))...>::value;
+}
+
+
   template
 constexpr decltype(auto)
 visit(_Visitor&& __visitor, _Variants&&... __variants)
@@ -1704,8 +1740,28 @@ namespace __variant
 
   using _Tag = __detail::__variant::__deduce_visit_result<_Result_type>;
 
-  return std::__do_visit<_Tag>(std::forward<_Visitor>(__visitor),
-   std::forward<_Variants>(__variants)...);
+  if constexpr (sizeof...(_Variants) == 1)
+{
+	  constexpr bool __visit_rettypes_match =
+	__check_visitor_results<_Visitor, _Variants...>(
+	  std::make_index_sequence<
+	std::variant_size...>::value>());
+	  if constexpr (!__visit_rettypes_match)
+	{
+	  static_assert(__visit_rettypes_match,
+			  "std::visit requires the visitor to have the same "
+			  "return type for all alternatives of a variant");
+	  return;
+	}
+	  else
+	return std::__do_visit<_Tag>(
+	  std::forward<_Visitor>(__visitor),
+	  std::forward<_Variants>(__variants)...);
+	}
+  else
+	return std::__do_visit<_Tag>(
+  std::forward<_Visitor>(__visitor),
+	  std::forward<_Variants>(__variants)...);
 }
 
 #if __cplusplus > 201703L


Re: [PING][PATCH] correct handling of indices into arrays with elements larger than 1 (PR c++/96511)

2020-09-28 Thread Martin Sebor via Gcc-patches

On 9/25/20 11:17 PM, Jason Merrill wrote:

On 9/22/20 4:05 PM, Martin Sebor wrote:

The rebased and retested patches are attached.

On 9/21/20 3:17 PM, Martin Sebor wrote:
Ping: 
https://gcc.gnu.org/pipermail/gcc-patches/2020-September/553906.html


(I'm working on rebasing the patch on top of the latest trunk which
has changed some of the same code but it'd be helpful to get a go-
ahead on substance the changes.  I don't expect the rebase to
require any substantive modifications.)

Martin

On 9/14/20 4:01 PM, Martin Sebor wrote:

On 9/4/20 11:14 AM, Jason Merrill wrote:

On 9/3/20 2:44 PM, Martin Sebor wrote:

On 9/1/20 1:22 PM, Jason Merrill wrote:

On 8/11/20 12:19 PM, Martin Sebor via Gcc-patches wrote:

-Wplacement-new handles array indices and pointer offsets the same:
by adjusting them by the size of the element.  That's correct for
the latter but wrong for the former, causing false positives when
the element size is greater than one.

In addition, the warning doesn't even attempt to handle arrays of
arrays.  I'm not sure if I forgot or if I simply didn't think of
it.

The attached patch corrects these oversights by replacing most
of the -Wplacement-new code with a call to compute_objsize which
handles all this correctly (plus more), and is also better tested.
But even compute_objsize has bugs: it trips up while converting
wide_int to offset_int for some pointer offset ranges.  Since
handling the C++ IL required changes in this area the patch also
fixes that.

For review purposes, the patch affects just the middle end.
The C++ diff pretty much just removes code from the front end.


The C++ changes are OK.


Thank you for looking at the rest as well.




-compute_objsize (tree ptr, int ostype, access_ref *pref,
-    bitmap *visited, const vr_values *rvals /* = 
NULL */)
+compute_objsize (tree ptr, int ostype, access_ref *pref, bitmap 
*visited,

+    const vr_values *rvals)


This reformatting seems unnecessary, and I prefer to keep the 
comment about the default argument.


This overload doesn't take a default argument.  (There was a stray
declaration of a similar function at the top of the file that had
one.  I've removed it.)


Ah, true.


-  if (!size || TREE_CODE (size) != INTEGER_CST)
-   return false;

 >...

You change some failure cases in compute_objsize to return 
success with a maximum range, while others continue to return 
failure. This needs commentary about the design rationale.


This is too much for a comment in the code but the background is
this: compute_objsize initially returned the object size as a 
constant.
Recently, I have enhanced it to return a range to improve warnings 
for
allocated objects.  With that, a failure can be turned into 
success by
having the function set the range to that of the largest object.  
That

should simplify the function's callers and could even improve
the detection of some invalid accesses.  Once this change is made
it might even be possible to change its return type to void.

The change that caught your eye is necessary to make the function
a drop-in replacement for the C++ front end code which makes this
same assumption.  Without it, a number of test cases that exercise
VLAs fail in g++.dg/warn/Wplacement-new-size-5.C.  For example:

   void f (int n)
   {
 char a[n];
 new (a - 1) int ();
   }

Changing any of the other places isn't necessary for existing tests
to pass (and I didn't want to introduce too much churn).  But I do
want to change the rest of the function along the same lines at some
point.


Please do change the other places to be consistent; better to have 
more churn than to leave the function half-updated.  That can be a 
separate patch if you prefer, but let's do it now rather than later.


I've made most of these changes in the other patch (also attached).
I'm quite happy with the result but it turned out to be a lot more
work than either of us expected, mostly due to the amount of testing.

I've left a couple of failing cases in place mainly as reminders
to handle them better (which means I also didn't change the caller
to avoid testing for failures).  I've also added TODO notes with
reminders to handle some of the new codes more completely.




+  special_array_member sam{ };


sam is always set by component_ref_size, so I don't think it's 
necessary to initialize it at the declaration.


I find initializing pass-by-pointer local variables helpful but
I don't insist on it.




@@ -187,7 +187,7 @@ decl_init_size (tree decl, bool min)
   tree last_type = TREE_TYPE (last);
   if (TREE_CODE (last_type) != ARRAY_TYPE
   || TYPE_SIZE (last_type))
-    return size;
+    return size ? size : TYPE_SIZE_UNIT (type);


This change seems to violate the comment for the function.


By my reading (and writing) the change is covered by the first
sentence:

    Returns the size of the object designated by DECL considering
    its initializer if it either has one or if it would not affect
    its 

Re: [PATCH] libstdc++: Add C++2a synchronization support

2020-09-28 Thread Thomas Rodgers via Gcc-patches


Jonathan Wakely writes:

> On 11/09/20 16:58 -0700, Thomas Rodgers wrote:
>>From: Thomas Rodgers 
>>
>>This patch supercedes both the Add C++2a synchronization support patch
>>being replied to *and* the patch adding wait/notify_* to atomic_flag.
>>
>>Add support for -
>>  * atomic_flag::wait/notify_one/notify_all
>>  * atomic::wait/notify_one/notify_all
>>  * counting_semaphore
>>  * binary_semaphore
>>  * latch
>>
>>libstdc++-v3/ChangeLog:
>>
>>  * include/Makefile.am (bits_headers): Add new header.
>>  * include/Makefile.in: Regenerate.
>>  * include/bits/atomic_base.h (__atomic_flag::wait): Define.
>>  (__atomic_flag::notify_one): Likewise.
>>  (__atomic_flag::notify_all): Likewise.
>>  (__atomic_base<_Itp>::wait): Likewise.
>>  (__atomic_base<_Itp>::notify_one): Likewise.
>>  (__atomic_base<_Itp>::notify_all): Likewise.
>>  (__atomic_base<_Ptp*>::wait): Likewise.
>>  (__atomic_base<_Ptp*>::notify_one): Likewise.
>>  (__atomic_base<_Ptp*>::notify_all): Likewise.
>>  (__atomic_impl::wait): Likewise.
>>  (__atomic_impl::notify_one): Likewise.
>>  (__atomic_impl::notify_all): Likewise.
>>  (__atomic_float<_Fp>::wait): Likewise.
>>  (__atomic_float<_Fp>::notify_one): Likewise.
>>  (__atomic_float<_Fp>::notify_all): Likewise.
>>  (__atomic_ref<_Tp>::wait): Likewise.
>>  (__atomic_ref<_Tp>::notify_one): Likewise.
>>  (__atomic_ref<_Tp>::notify_all): Likewise.
>>  (atomic_wait<_Tp>): Likewise.
>>  (atomic_wait_explicit<_Tp>): Likewise.
>>  (atomic_notify_one<_Tp>): Likewise.
>>  (atomic_notify_all<_Tp>): Likewise.
>>  * include/bits/atomic_wait.h: New file.
>>  * include/bits/atomic_timed_wait.h: New file.
>>  * include/bits/semaphore_base.h: New file.
>>  * include/std/atomic (atomic::wait): Define.
>>  (atomic::wait_one): Likewise.
>>  (atomic::wait_all): Likewise.
>>  (atomic<_Tp>::wait): Likewise.
>>  (atomic<_Tp>::wait_one): Likewise.
>>  (atomic<_Tp>::wait_all): Likewise.
>>  (atomic<_Tp*>::wait): Likewise.
>>  (atomic<_Tp*>::wait_one): Likewise.
>>  (atomic<_Tp*>::wait_all): Likewise.
>>  * include/std/latch: New file.
>>  * include/std/semaphore: New file.
>>  * include/std/version: Add __cpp_lib_semaphore and
>>  __cpp_lib_latch defines.
>>  * testsuite/29_atomic/atomic/wait_notify/atomic_refs.cc: New test.
>>  * testsuite/29_atomic/atomic/wait_notify/bool.cc: Likewise.
>>  * testsuite/29_atomic/atomic/wait_notify/integrals.cc: Likewise.
>>  * testsuite/29_atomic/atomic/wait_notify/floats.cc: Likewise.
>>  * testsuite/29_atomic/atomic/wait_notify/pointers.cc: Likewise.
>>  * testsuite/29_atomic/atomic/wait_notify/generic.cc: Liekwise.
>>  * testsuite/29_atomic/atomic/wait_notify/generic.h: New File.
>>  * testsuite/29_atomics/atomic_flag/wait_notify/1.cc: New test.
>>  * testsuite/30_thread/semaphore/1.cc: New test.
>>  * testsuite/30_thread/semaphore/2.cc: Likewise.
>>  * testsuite/30_thread/semaphore/least_max_value_neg.cc: Likewise.
>>  * testsuite/30_thread/semaphore/try_acquire.cc: Likewise.
>>  * testsuite/30_thread/semaphore/try_acquire_for.cc: Likewise.
>>  * testsuite/30_thread/semaphore/try_acquire_posix.cc: Likewise.
>>  * testsuite/30_thread/semaphore/try_acquire_until.cc: Likewise.
>>  * testsuite/30_thread/latch/1.cc: New test.
>>  * testsuite/30_thread/latch/2.cc: New test.
>>  * testsuite/30_thread/latch/3.cc: New test.
>>---
>> libstdc++-v3/include/Makefile.am  |   5 +
>> libstdc++-v3/include/Makefile.in  |   5 +
>> libstdc++-v3/include/bits/atomic_base.h   | 195 +++-
>> libstdc++-v3/include/bits/atomic_timed_wait.h | 281 
>> libstdc++-v3/include/bits/atomic_wait.h   | 301 ++
>> libstdc++-v3/include/bits/semaphore_base.h| 283 
>> libstdc++-v3/include/std/atomic   |  73 +
>> libstdc++-v3/include/std/latch|  90 ++
>> libstdc++-v3/include/std/semaphore|  92 ++
>> libstdc++-v3/include/std/version  |   2 +
>> .../atomic/wait_notify/atomic_refs.cc | 103 ++
>> .../29_atomics/atomic/wait_notify/bool.cc |  59 
>> .../29_atomics/atomic/wait_notify/floats.cc   |  32 ++
>> .../29_atomics/atomic/wait_notify/generic.cc  |  31 ++
>> .../29_atomics/atomic/wait_notify/generic.h   | 160 ++
>> .../atomic/wait_notify/integrals.cc   |  65 
>> .../29_atomics/atomic/wait_notify/pointers.cc |  59 
>> .../29_atomics/atomic_flag/wait_notify/1.cc   |  61 
>> libstdc++-v3/testsuite/30_threads/latch/1.cc  |  27 ++
>> libstdc++-v3/testsuite/30_threads/latch/2.cc  |  27 ++
>> libstdc++-v3/testsuite/30_threads/latch/3.cc  |  50 +++
>> .../testsuite/30_threads/semaphore/1.cc   |  27 ++
>> .../testsuite/30_threads/semaphore/2.cc   |  27 ++
>> .../semaphore/least_max_value_neg.cc  

[Patch, Fortran] libgomp: Silence unpack's may be used uninitialized warning

2020-09-28 Thread Tobias Burnus

There are more warnings, but I picked this one which shows up
with default build options for GCC trunk – twice for each
generated file:

../../../../repos/gcc/libgfortran/generated/unpack_i2.c:128:12: warning: 
‘rstride’ may be used uninitialized [-Wmaybe-uninitialized]
../../../../repos/gcc/libgfortran/generated/unpack_i2.c:278:12: warning: 
‘rstride’ may be used uninitialized [-Wmaybe-uninitialized]

Hence, 13 times these messages (×2 warnings/file + ×2 with multilib) → 53 
'warning:'.

Seemingly, the compiler logic has changes as the previous location
used to be sufficient to silence the compiler.
(The warning is right if base_addr = NULL and dim = 0; the latter should
not occur but the compiler does not know this.)

Committed as obvious as r11-3508-g69c56ce673d1e1d4508e82053a32011f807c6088

Tobias

-
Mentor Graphics (Deutschland) GmbH, Arnulfstraße 201, 80634 München / Germany
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Alexander 
Walter
libgfortran/m4/unpack.m4: Silence -Wmaybe-uninitialized

libgfortran/ChangeLog:

	* m4/unpack.m4 (unpack0_'rtype_code`,
	unpack1_'rtype_code`): Move 'rstride[0]' initialization outside
	outside conditional branch to silence -Wmaybe-uninitialized.
	* generated/unpack_c10.c: Regenerate.
	* generated/unpack_c16.c: Regenerate.
	* generated/unpack_c4.c: Regenerate.
	* generated/unpack_c8.c: Regenerate.
	* generated/unpack_i1.c: Regenerate.
	* generated/unpack_i16.c: Regenerate.
	* generated/unpack_i2.c: Regenerate.
	* generated/unpack_i4.c: Regenerate.
	* generated/unpack_i8.c: Regenerate.
	* generated/unpack_r10.c: Regenerate.
	* generated/unpack_r16.c: Regenerate.
	* generated/unpack_r4.c: Regenerate.
	* generated/unpack_r8.c: Regenerate.

 libgfortran/generated/unpack_c10.c | 8 
 libgfortran/generated/unpack_c16.c | 8 
 libgfortran/generated/unpack_c4.c  | 8 
 libgfortran/generated/unpack_c8.c  | 8 
 libgfortran/generated/unpack_i1.c  | 8 
 libgfortran/generated/unpack_i16.c | 8 
 libgfortran/generated/unpack_i2.c  | 8 
 libgfortran/generated/unpack_i4.c  | 8 
 libgfortran/generated/unpack_i8.c  | 8 
 libgfortran/generated/unpack_r10.c | 8 
 libgfortran/generated/unpack_r16.c | 8 
 libgfortran/generated/unpack_r4.c  | 8 
 libgfortran/generated/unpack_r8.c  | 8 
 libgfortran/m4/unpack.m4   | 8 
 14 files changed, 56 insertions(+), 56 deletions(-)

diff --git a/libgfortran/m4/unpack.m4 b/libgfortran/m4/unpack.m4
index da7a5ce0443..ebc469a84f8 100644
--- a/libgfortran/m4/unpack.m4
+++ b/libgfortran/m4/unpack.m4
@@ -80,6 +80,8 @@ unpack0_'rtype_code` ('rtype` *ret, const 'rtype` *vector,
   else
 runtime_error ("Funny sized logical array");
 
+  /* Initialize to avoid -Wmaybe-uninitialized complaints.  */
+  rstride[0] = 1;
   if (ret->base_addr == NULL)
 {
   /* The front end has signalled that we need to populate the
@@ -103,8 +105,6 @@ unpack0_'rtype_code` ('rtype` *ret, const 'rtype` *vector,
   else
 {
   dim = GFC_DESCRIPTOR_RANK (ret);
-  /* Initialize to avoid -Wmaybe-uninitialized complaints.  */
-  rstride[0] = 1;
   for (n = 0; n < dim; n++)
 	{
 	  count[n] = 0;
@@ -226,6 +226,8 @@ unpack1_'rtype_code` ('rtype` *ret, const 'rtype` *vector,
   else
 runtime_error ("Funny sized logical array");
 
+  /* Initialize to avoid -Wmaybe-uninitialized complaints.  */
+  rstride[0] = 1;
   if (ret->base_addr == NULL)
 {
   /* The front end has signalled that we need to populate the
@@ -250,8 +252,6 @@ unpack1_'rtype_code` ('rtype` *ret, const 'rtype` *vector,
   else
 {
   dim = GFC_DESCRIPTOR_RANK (ret);
-  /* Initialize to avoid -Wmaybe-uninitialized complaints.  */
-  rstride[0] = 1;
   for (n = 0; n < dim; n++)
 	{
 	  count[n] = 0;
diff --git a/libgfortran/generated/unpack_c10.c b/libgfortran/generated/unpack_c10.c
index e3b3c29e9da..12110df8af0 100644
--- a/libgfortran/generated/unpack_c10.c
+++ b/libgfortran/generated/unpack_c10.c
@@ -79,6 +79,8 @@ unpack0_c10 (gfc_array_c10 *ret, const gfc_array_c10 *vector,
   else
 runtime_error ("Funny sized logical array");
 
+  /* Initialize to avoid -Wmaybe-uninitialized complaints.  */
+  rstride[0] = 1;
   if (ret->base_addr == NULL)
 {
   /* The front end has signalled that we need to populate the
@@ -102,8 +104,6 @@ unpack0_c10 (gfc_array_c10 *ret, const gfc_array_c10 *vector,
   else
 {
   dim = GFC_DESCRIPTOR_RANK (ret);
-  /* Initialize to avoid -Wmaybe-uninitialized complaints.  */
-  rstride[0] = 1;
   for (n = 0; n < dim; n++)
 	{
 	  count[n] = 0;
@@ -225,6 +225,8 @@ unpack1_c10 (gfc_array_c10 *ret, const gfc_array_c10 *vector,
   else
 runtime_error ("Funny sized logical array");
 
+  /* Initialize to avoid -Wmaybe-uninitialized complaints.  */
+  rstride[0] = 1;
   if (ret->base_addr == NULL)
 {
   /* The front end has signalled that we need to 

libbacktrace patch committed: Create mtest.dsym

2020-09-28 Thread Ian Lance Taylor via Gcc-patches
This libbacktrace patch creates mtest.dsym when using dsymutil.  This
is for PR 97082, but it probably doesn't fix the PR.  Bootstrapped and
ran libbacktrace tests on x86_64-pc-linux-gnu.  Committed to mainline.

Ian

PR libbacktrace/97082
* Makefile.am (check_DATA): Add mtest.dSYM if USE_DSYMUTIL.
* Makefile.in: Regenerate.
diff --git a/libbacktrace/Makefile.am b/libbacktrace/Makefile.am
index f7e8ca2cf5c..5899a2157f4 100644
--- a/libbacktrace/Makefile.am
+++ b/libbacktrace/Makefile.am
@@ -484,6 +484,10 @@ mtest_LDADD = libbacktrace.la
 
 BUILDTESTS += mtest
 
+if USE_DSYMUTIL
+check_DATA += mtest.dSYM
+endif USE_DSYMUTIL
+
 if HAVE_MINIDEBUG
 
 TESTS += mtest_minidebug


Re: [PATCH] c++: Incomplete parameter mappings during normalization

2020-09-28 Thread Patrick Palka via Gcc-patches
On Mon, 28 Sep 2020, Jason Merrill wrote:

> On 9/25/20 4:44 PM, Patrick Palka wrote:
> > In the testcase concepts7.C below, we currently reject the call to f1
> > but we accept the call to f2, even though their associated constraints
> > are functionally equivalent.
> 
> Rather, because they are not functionally equivalent.
> 
> > The reason satisfaction differs for (!!C > C is due to normalization: the former is already an
> > atom, and the latter is not.  Normalization of the former yields itself,
> > whereas normalization of the latter yields the atom 'true' with an empty
> > parameter mapping (since the atom uses no template parameters).
> 
> Yes.
> 
> > So when
> > building the latter atom we threw away the T::type term that would later
> > result in substitution failure during satisfaction.
> > 
> > However, [temp.constr.normal]/1 says:
> > 
> >- ...
> >- The normal form of a concept-id C is the normal
> >  form of the constraint-expression of C, after substituting A1, A2,
> >  ..., An for C's respective template parameters in the parameter
> >  mappings in each atomic constraint.
> >- The normal form of any other expression E is the atomic constraint
> >  whose expression is E and whose parameter mapping is the identity
> >  mapping.
> > 
> > I believe these two bullet points imply that the atom 'true' in the
> > normal form of C should have the mapping R |-> T::type
> > instead of the empty mapping that we give it, because according to the
> > last bullet point, each atom should start out with the identity mapping
> > that includes all template parameters.
> 
> But [temp.constr.atomic] says,
> 
> An atomic constraint is formed from an expression E and a mapping from the
> template parameters that appear within E...

I see..  Well, that certainly settles that :)  I definitely read that
passage more than once, but I must have glossed over this key wording
each time.  That, and my misinterpreting of "the identity mapping" as
"the identity mapping over all template parameters" ultimately led me
down the wrong path.

> 
> So the identity mapping only includes template parameters that appear within
> the expression, which in this case is none.
> 
> Also, the discussion of the example in [temp.constr.normal] says,
> 
> "Normalization of B’s constraint-expression is valid and results in T::value
> (with the mapping T → U*) V true (with an empty mapping)."
> 
> So I think the testcase is already handled as specified.

Got it.  Thanks for your clarification, and sorry for the noise.  I'll
make sure to read the standard ever more carefully next time.

> 
> > This patch fixes this issue by always giving the first atom in the
> > normal form of each concept a 'complete' parameter mapping, i.e. one
> > that includes all template parameters.  I think it suffices to do this
> > only for the first atom so that we catch substitution failures like in
> > concepts7.C at the right time.  For the other atoms, their mappings can
> > continue to include only template parameters used in the atom.
> > 
> > I noticed that PR92268 alludes to this issue, so this patch refers to
> > that PR and adds the PR's first testcase which we now accept.
> > 
> > Is the above interpretation of the standard correct here?  If so, does
> > this seem like a good approach?
> > 
> > gcc/cp/ChangeLog:
> > 
> > PR c++/92268
> > * constraint.cc (build_parameter_mapping): Add a bool parameter
> > 'complete'.  When 'complete' is true, then include all in-scope
> > template parameters in the mapping.
> > (norm_info::update_context): Pass false as the 'complete'
> > argument to build_parameter_mapping.
> > (norm_info::normalized_first_atom_p): New bool data member.
> > (normalize_logical_operation): Set info.normalized_first_atom_p
> > after normalizing the left operand.
> > (normalize_concept_check): Reset info.normalized_first_atom_p
> > before normalizing this concept.
> > (normalize_atom): Always give the first atom of a concept
> > definition a complete parameter mapping.
> > 
> > gcc/testsuite/ChangeLog:
> > 
> > PR c++/92268
> > * g++.dg/cpp2a/concepts-pr92268.C: New test.
> > * g++.dg/cpp2a/concepts-return-req1.C: Don't expect an error,
> > as the call is no longer ambiguous.
> > * g++.dg/cpp2a/concepts7.C: New test.
> > * g++.dg/cpp2a/concepts8.C: New test.
> > ---
> >   gcc/cp/constraint.cc  | 44 ---
> >   gcc/testsuite/g++.dg/cpp2a/concepts-pr92268.C | 42 ++
> >   .../g++.dg/cpp2a/concepts-return-req1.C   |  2 +-
> >   gcc/testsuite/g++.dg/cpp2a/concepts7.C|  9 
> >   gcc/testsuite/g++.dg/cpp2a/concepts8.C| 25 +++
> >   5 files changed, 116 insertions(+), 6 deletions(-)
> >   create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-pr92268.C
> >   create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts7.C
> >   create mode 100644 

libbacktrace patch committed: Only use dsymutil with Mach-O

2020-09-28 Thread Ian Lance Taylor via Gcc-patches
This patch changes the libbacktrace tests to only run dsymutil when
building for Mach-O.  This should fix GCC PR 97227.  Bootstrapped and
ran libbacktrace tests on x86_64-pc-linux-gnu.  Committed to mainline.

Ian

PR libbacktrace/97227
* configure.ac (USE_DSYMUTIL): Define instead of HAVE_DSYMUTIL.
* Makefile.am: Change all uses of HAVE_DSYMUTIL to USE_DSYMUTIL.
* configure: Regenerate.
* Makefile.in: Regenerate.
diff --git a/libbacktrace/Makefile.am b/libbacktrace/Makefile.am
index 4d349386c9b..f7e8ca2cf5c 100644
--- a/libbacktrace/Makefile.am
+++ b/libbacktrace/Makefile.am
@@ -100,12 +100,12 @@ check_DATA =
 # Flags to use when compiling test programs.
 libbacktrace_TEST_CFLAGS = $(EXTRA_FLAGS) $(WARN_FLAGS) -g
 
-if HAVE_DSYMUTIL
+if USE_DSYMUTIL
 
 %.dSYM: %
$(DSYMUTIL) $<
 
-endif HAVE_DSYMUTIL
+endif USE_DSYMUTIL
 
 if NATIVE
 check_LTLIBRARIES = libbacktrace_alloc.la
@@ -237,9 +237,9 @@ allocfail.sh: allocfail
 
 TESTS += allocfail.sh
 
-if HAVE_DSYMUTIL
+if USE_DSYMUTIL
 check_DATA += allocfail.dSYM
-endif HAVE_DSYMUTIL
+endif USE_DSYMUTIL
 
 if HAVE_ELF
 if HAVE_OBJCOPY_DEBUGLINK
@@ -273,9 +273,9 @@ btest_LDADD = libbacktrace.la
 
 BUILDTESTS += btest
 
-if HAVE_DSYMUTIL
+if USE_DSYMUTIL
 check_DATA += btest.dSYM
-endif HAVE_DSYMUTIL
+endif USE_DSYMUTIL
 
 if HAVE_ELF
 
@@ -293,9 +293,9 @@ btest_alloc_LDADD = libbacktrace_alloc.la
 
 BUILDTESTS += btest_alloc
 
-if HAVE_DSYMUTIL
+if USE_DSYMUTIL
 check_DATA += btest_alloc.dSYM
-endif HAVE_DSYMUTIL
+endif USE_DSYMUTIL
 
 if HAVE_DWZ
 
@@ -323,9 +323,9 @@ stest_LDADD = libbacktrace.la
 
 BUILDTESTS += stest
 
-if HAVE_DSYMUTIL
+if USE_DSYMUTIL
 check_DATA += stest.dSYM
-endif HAVE_DSYMUTIL
+endif USE_DSYMUTIL
 
 stest_alloc_SOURCES = $(stest_SOURCES)
 stest_alloc_CFLAGS = $(libbacktrace_TEST_CFLAGS)
@@ -333,9 +333,9 @@ stest_alloc_LDADD = libbacktrace_alloc.la
 
 BUILDTESTS += stest_alloc
 
-if HAVE_DSYMUTIL
+if USE_DSYMUTIL
 check_DATA += stest_alloc.dSYM
-endif HAVE_DSYMUTIL
+endif USE_DSYMUTIL
 
 if HAVE_ELF
 
@@ -366,17 +366,17 @@ edtest_LDADD = libbacktrace.la
 
 BUILDTESTS += edtest
 
-if HAVE_DSYMUTIL
+if USE_DSYMUTIL
 check_DATA += edtest.dSYM
-endif HAVE_DSYMUTIL
+endif USE_DSYMUTIL
 
 edtest_alloc_SOURCES = $(edtest_SOURCES)
 edtest_alloc_CFLAGS = $(libbacktrace_TEST_CFLAGS)
 edtest_alloc_LDADD = libbacktrace_alloc.la
 
-if HAVE_DSYMUTIL
+if USE_DSYMUTIL
 check_DATA += edtest_alloc.dSYM
-endif HAVE_DSYMUTIL
+endif USE_DSYMUTIL
 
 BUILDTESTS += edtest_alloc
 
@@ -394,9 +394,9 @@ ttest_SOURCES = ttest.c testlib.c
 ttest_CFLAGS = $(libbacktrace_TEST_CFLAGS) -pthread
 ttest_LDADD = libbacktrace.la
 
-if HAVE_DSYMUTIL
+if USE_DSYMUTIL
 check_DATA += ttest.dSYM
-endif HAVE_DSYMUTIL
+endif USE_DSYMUTIL
 
 BUILDTESTS += ttest_alloc
 
@@ -404,9 +404,9 @@ ttest_alloc_SOURCES = $(ttest_SOURCES)
 ttest_alloc_CFLAGS = $(ttest_CFLAGS)
 ttest_alloc_LDADD = libbacktrace_alloc.la
 
-if HAVE_DSYMUTIL
+if USE_DSYMUTIL
 check_DATA += ttest_alloc.dSYM
-endif HAVE_DSYMUTIL
+endif USE_DSYMUTIL
 
 endif HAVE_PTHREAD
 
@@ -462,9 +462,9 @@ dwarf5_LDADD = libbacktrace.la
 
 BUILDTESTS += dwarf5
 
-if HAVE_DSYMUTIL
+if USE_DSYMUTIL
 check_DATA += dwarf5.dSYM
-endif HAVE_DSYMUTIL
+endif USE_DSYMUTIL
 
 dwarf5_alloc_SOURCES = $(dwarf5_SOURCES)
 dwarf5_alloc_CFLAGS = $(dwarf5_CFLAGS)
@@ -472,9 +472,9 @@ dwarf5_alloc_LDADD = libbacktrace_alloc.la
 
 BUILDTESTS += dwarf5_alloc
 
-if HAVE_DSYMUTIL
+if USE_DSYMUTIL
 check_DATA += dwarf5_alloc.dSYM
-endif HAVE_DSYMUTIL
+endif USE_DSYMUTIL
 
 endif
 
diff --git a/libbacktrace/configure.ac b/libbacktrace/configure.ac
index 0659ea60484..ec456bf4a1f 100644
--- a/libbacktrace/configure.ac
+++ b/libbacktrace/configure.ac
@@ -510,7 +510,7 @@ AM_CONDITIONAL(HAVE_OBJCOPY_DEBUGLINK, test 
"$libbacktrace_cv_objcopy_debuglink"
 
 AC_ARG_VAR(DSYMUTIL, [location of dsymutil])
 AC_CHECK_PROG(DSYMUTIL, dsymutil, dsymutil)
-AM_CONDITIONAL(HAVE_DSYMUTIL, test -n "${DSYMUTIL}")
+AM_CONDITIONAL(USE_DSYMUTIL, test -n "${DSYMUTIL}" -a "$FORMAT_FILE" = 
"macho.lo")
 
 AC_ARG_VAR(NM, [location of nm])
 AC_CHECK_PROG(NM, nm, nm)


Re: [PATCH] options: Save and restore opts_set for Optimization and Target options

2020-09-28 Thread Jakub Jelinek via Gcc-patches
On Mon, Sep 28, 2020 at 09:50:00PM +0200, Stefan Schulze Frielinghaus wrote:
> This patch breaks quite a view test cases (target-attribute/tattr-*) on
> IBM Z.  Having a look at function cl_target_option_restore reveals that
> some members of opts_set are reduced to 1 or 0 depending on whether a
> member was set before or not, e.g. for target_flags we have
> 
> opts_set->x_target_flags = (mask & 1) != 0;
> 
> whereas previously those members where not touched by
> cl_target_option_restore.

opts_set is always used as a booleans, cleaner would be to have a 
struct global_option_set that would contain just bool fields (or even
bitfields) with the same names as in struct global_option, but unfortunately
we didn't do that years ago and changing it now would be quite a lot of
work.
Anyway, opts_set records a boolean for each flag, whether it was set
explicitly or not, for non-pointer vars that means values 0 and 1,
for pointer vars NULL and "".
I vaguely remember the s390 opts_set handling differed quite a lot from all
the other targets.

> What puzzles me right now is that in cl_target_option_save we save in
> ptr only options from opts but not from opts_set whereas in
> cl_target_option_restore we override some members of opts_set.  Thus it
> is unclear to me how a backend should restore opts_set then.

I've handled in the generic code handling of the opts_set only for whatever
is handled there for opts too.
Whether the vars that are handled in the save/restore options hooks need
handling of opts_set at all or not depends on whether the backend ever cares
about those or not (e.g. whether it every queries
global_options_set.x_whatever).  If yes, it can be handled e.g. by having an
extra target variable that will store whether it was explicit or not and
restore from there.  If it is never used, then it doesn't need to be
handled.

Jakub



Re: [PATCH] arm: Add a couple of extra stack-protector tests

2020-09-28 Thread Christophe Lyon via Gcc-patches
On Wed, 23 Sep 2020 at 20:33, Richard Sandiford
 wrote:
>
> These tests were inspired by the corresponding aarch64 ones that I just
> committed.  They already pass.
>
> Tested on arm-linux-gnueabi, arm-linux-gnueabihf and armeb-eabi.
> OK for trunk?
>
> Richard
>
>
> gcc/testsuite/
> * gcc.target/arm/stack-protector-5.c: New test.
> * gcc.target/arm/stack-protector-6.c: Likewise.
> ---

Hi Richard,

These new tests fail when compiling for cortex-a15 and cortex-a57...
There are 2 "str" instructions generated, the code is much longer than
for cortex-a9 for instance.

They pass with cortex-a9, cortex-a5 and arm10tdmi.

Christophe

>  .../gcc.target/arm/stack-protector-5.c| 21 +++
>  .../gcc.target/arm/stack-protector-6.c|  8 +++
>  2 files changed, 29 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/arm/stack-protector-5.c
>  create mode 100644 gcc/testsuite/gcc.target/arm/stack-protector-6.c
>
> diff --git a/gcc/testsuite/gcc.target/arm/stack-protector-5.c 
> b/gcc/testsuite/gcc.target/arm/stack-protector-5.c
> new file mode 100644
> index 000..b808b11aa3d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/stack-protector-5.c
> @@ -0,0 +1,21 @@
> +/* { dg-do compile } */
> +/* { dg-options "-fstack-protector-all -O2" } */
> +
> +void __attribute__ ((noipa))
> +f (void)
> +{
> +  volatile int x;
> +  asm volatile ("" :::
> +   "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
> +   "r8", "r9", "r10", "r11", "r12", "r14");
> +}
> +
> +/* The register clobbers above should not generate any single LDRs or STRs;
> +   all registers should be pushed and popped using register lists.  The only
> +   STRs should therefore be those associated with the stack protector tests
> +   themselves.
> +
> +   Make sure the address of the canary is not spilled and reloaded,
> +   since that would give the attacker an opportunity to change the
> +   canary value.  */
> +/* { dg-final { scan-assembler-times {\tstr\t} 1 } } */
> diff --git a/gcc/testsuite/gcc.target/arm/stack-protector-6.c 
> b/gcc/testsuite/gcc.target/arm/stack-protector-6.c
> new file mode 100644
> index 000..f8eec878bd6
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/stack-protector-6.c
> @@ -0,0 +1,8 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target fpic } */
> +/* { dg-options "-fstack-protector-all -O2 -fpic" } */
> +
> +#include "stack-protector-5.c"
> +
> +/* See the comment in stack-protector-5.c.  */
> +/* { dg-final { scan-assembler-times {\tstr\t} 1 } } */


Re: [PATCH] options: Save and restore opts_set for Optimization and Target options

2020-09-28 Thread Stefan Schulze Frielinghaus via Gcc-patches
On Sun, Sep 13, 2020 at 10:29:22AM +0200, Jakub Jelinek via Gcc-patches wrote:
> On Fri, Sep 11, 2020 at 11:29:52AM +0200, Jakub Jelinek via Gcc-patches wrote:
> > On Fri, Sep 11, 2020 at 09:46:37AM +0200, Christophe Lyon via Gcc-patches 
> > wrote:
> > > I'm seeing an ICE with this new test on most of my arm configurations,
> > > for instance:
> > > --target arm-none-linux-gnueabi --with-cpu cortex-a9
> > > /aci-gcc-fsf/builds/gcc-fsf-gccsrc/obj-arm-none-linux-gnueabi/gcc3/gcc/xgcc
> > > -B/aci-gcc-fsf/builds/gcc-fsf-gccsrc/obj-ar
> > > m-none-linux-gnueabi/gcc3/gcc/ c_lto_pr96939_0.o c_lto_pr96939_1.o
> > > -fdiagnostics-plain-output -flto -O2 -o
> > > gcc-target-arm-lto-pr96939-01.exe
> > 
> > Seems a latent issue.
> > Neither cl_optimization_{save,restore} nor cl_target_option_{save,restore}
> > (nor any of the target hooks they call) saves or restores any opts_set
> > values, so I think opts_set can be trusted only during option processing (if
> > at all), but not later.
> > So, short term a fix would be IMHO just stop using opts_set altogether in
> > arm_configure_build_target, it doesn't make much sense to me, it should test
> > if those strings are non-NULL instead, or at least do that when it is
> > invoked from arm_option_restore (e.g. could be done by calling it with
> > opts instead of _options_set ).
> > Longer term, the question is if cl_optimization_{save,restore} and
> > cl_target_option_{save,restore} shouldn't be changed not to only
> > save/restore the options, but also save the opts_set flags.
> > It could be done e.g. by adding a bool array or set of bool members
> > to struct cl_optimization and struct cl_target_option , or even more compact
> > by using bitmasks, pack each 64 adjacent option flags into a UHWI element
> > of an array.
> 
> So, I've tried under debugger how it behaves and seems global_options_set
> is really an or of whether an option has been ever seen as explicit, either
> on the command line or in any of the option pragmas or optimize/target
> attributes seen so far, so it isn't something that can be relied on.
> 
> The following patch implements the saving/restoring of the opts_set bits
> (though only for the options/variables saved by the generic options-save.c
> code, for the target specific stuff that isn't handled by the generic code
> the opts_set argument is now passed to the hook and the backends can choose
> e.g. to use a TargetSave variable to save the flags either individually or
> together in some bitmask (or ignore it if they never need opts_set for the
> options). 
> 
> Bootstrapped/regtested on x86_64-linux, i686-linux, armv7hl-linux-gnueabi,
> aarch64-linux, powerpc64le-linux and lto bootstrapped on x86_64-linux, ok
> for trunk?

This patch breaks quite a view test cases (target-attribute/tattr-*) on
IBM Z.  Having a look at function cl_target_option_restore reveals that
some members of opts_set are reduced to 1 or 0 depending on whether a
member was set before or not, e.g. for target_flags we have

opts_set->x_target_flags = (mask & 1) != 0;

whereas previously those members where not touched by
cl_target_option_restore.

My intuition of this whole option evaluation is still pretty vague.
Basically opts_set is a set of options enabled via command line and/or
via pragmas/attributes whereas opts is the set of options which are
implied by opts_set.

What puzzles me right now is that in cl_target_option_save we save in
ptr only options from opts but not from opts_set whereas in
cl_target_option_restore we override some members of opts_set.  Thus it
is unclear to me how a backend should restore opts_set then.
I'm probably missing something.  Any hints on how to restore opts_set
and especially target_flags?

Cheers,
Stefan


Re: [PATCH] RISC-V/libgcc: Use `-fasynchronous-unwind-tables' for LIB2_DIVMOD_FUNCS

2020-09-28 Thread Jim Wilson
On Sun, Aug 30, 2020 at 11:39 PM Kito Cheng  wrote
> Hi Maciej:
> LGTM, thanks for your patch!

I don't see this patch in the FSF GCC tree.  Maciej are you going to
commit it?  Or do you want us to commit it for you?

Jim


Re: [PATCH] RISC-V: Define __riscv_cmodel_medany for PIC mode.

2020-09-28 Thread Jim Wilson
On Thu, Sep 24, 2020 at 10:46 PM Kito Cheng  wrote:
>
>  - According the conclusion in RISC-V C API document, we decide to deprecat
>the __riscv_cmodel_pic marco
>
>  - __riscv_cmodel_pic is deprecated and will removed in next GCC
>release.

Looks good to me.  By the way, you can self approve patches like this.

Optionally, you might add a comment to the code to point out that
__riscv_cmodel_pic is deprecated.  That makes it a little easier to
understand the code.

Jim


Re: [PATCH] c++: Incomplete parameter mappings during normalization

2020-09-28 Thread Jason Merrill via Gcc-patches

On 9/25/20 4:44 PM, Patrick Palka wrote:

In the testcase concepts7.C below, we currently reject the call to f1
but we accept the call to f2, even though their associated constraints
are functionally equivalent.


Rather, because they are not functionally equivalent.


The reason satisfaction differs for (!!C is due to normalization: the former is already an
atom, and the latter is not.  Normalization of the former yields itself,
whereas normalization of the latter yields the atom 'true' with an empty
parameter mapping (since the atom uses no template parameters).


Yes.


So when
building the latter atom we threw away the T::type term that would later
result in substitution failure during satisfaction.

However, [temp.constr.normal]/1 says:

   - ...
   - The normal form of a concept-id C is the normal
 form of the constraint-expression of C, after substituting A1, A2,
 ..., An for C's respective template parameters in the parameter
 mappings in each atomic constraint.
   - The normal form of any other expression E is the atomic constraint
 whose expression is E and whose parameter mapping is the identity
 mapping.

I believe these two bullet points imply that the atom 'true' in the
normal form of C should have the mapping R |-> T::type
instead of the empty mapping that we give it, because according to the
last bullet point, each atom should start out with the identity mapping
that includes all template parameters.


But [temp.constr.atomic] says,

An atomic constraint is formed from an expression E and a mapping from 
the template parameters that appear within E...


So the identity mapping only includes template parameters that appear 
within the expression, which in this case is none.


Also, the discussion of the example in [temp.constr.normal] says,

"Normalization of B’s constraint-expression is valid and results in 
T::value (with the mapping T → U*) V true (with an empty mapping)."


So I think the testcase is already handled as specified.


This patch fixes this issue by always giving the first atom in the
normal form of each concept a 'complete' parameter mapping, i.e. one
that includes all template parameters.  I think it suffices to do this
only for the first atom so that we catch substitution failures like in
concepts7.C at the right time.  For the other atoms, their mappings can
continue to include only template parameters used in the atom.

I noticed that PR92268 alludes to this issue, so this patch refers to
that PR and adds the PR's first testcase which we now accept.

Is the above interpretation of the standard correct here?  If so, does
this seem like a good approach?

gcc/cp/ChangeLog:

PR c++/92268
* constraint.cc (build_parameter_mapping): Add a bool parameter
'complete'.  When 'complete' is true, then include all in-scope
template parameters in the mapping.
(norm_info::update_context): Pass false as the 'complete'
argument to build_parameter_mapping.
(norm_info::normalized_first_atom_p): New bool data member.
(normalize_logical_operation): Set info.normalized_first_atom_p
after normalizing the left operand.
(normalize_concept_check): Reset info.normalized_first_atom_p
before normalizing this concept.
(normalize_atom): Always give the first atom of a concept
definition a complete parameter mapping.

gcc/testsuite/ChangeLog:

PR c++/92268
* g++.dg/cpp2a/concepts-pr92268.C: New test.
* g++.dg/cpp2a/concepts-return-req1.C: Don't expect an error,
as the call is no longer ambiguous.
* g++.dg/cpp2a/concepts7.C: New test.
* g++.dg/cpp2a/concepts8.C: New test.
---
  gcc/cp/constraint.cc  | 44 ---
  gcc/testsuite/g++.dg/cpp2a/concepts-pr92268.C | 42 ++
  .../g++.dg/cpp2a/concepts-return-req1.C   |  2 +-
  gcc/testsuite/g++.dg/cpp2a/concepts7.C|  9 
  gcc/testsuite/g++.dg/cpp2a/concepts8.C| 25 +++
  5 files changed, 116 insertions(+), 6 deletions(-)
  create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-pr92268.C
  create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts7.C
  create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts8.C

diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc
index d49957a6c4a..729d02b73d7 100644
--- a/gcc/cp/constraint.cc
+++ b/gcc/cp/constraint.cc
@@ -559,10 +559,14 @@ map_arguments (tree parms, tree args)
return parms;
  }
  
-/* Build the parameter mapping for EXPR using ARGS.  */

+/* Build the parameter mapping for EXPR using ARGS.
+
+   If COMPLETE then return the complete parameter mapping that includes
+   all in-scope template parameters.  Otherwise include only the
+   parameters used by EXPR.  */
  
  static tree

-build_parameter_mapping (tree expr, tree args, tree decl)
+build_parameter_mapping (tree expr, tree args, tree decl, bool complete)
  {
tree ctx_parms = NULL_TREE;
if 

Re: [PATCH] tree-optimization/97151 - improve PTA for C++ operator delete

2020-09-28 Thread Jason Merrill via Gcc-patches

On 9/28/20 3:56 AM, Richard Biener wrote:

On Fri, 25 Sep 2020, Jason Merrill wrote:


On 9/25/20 2:30 AM, Richard Biener wrote:

On Thu, 24 Sep 2020, Jason Merrill wrote:


On 9/24/20 3:43 AM, Richard Biener wrote:

On Wed, 23 Sep 2020, Jason Merrill wrote:


On 9/23/20 2:42 PM, Richard Biener wrote:

On September 23, 2020 7:53:18 PM GMT+02:00, Jason Merrill

wrote:

On 9/23/20 4:14 AM, Richard Biener wrote:

C++ operator delete, when DECL_IS_REPLACEABLE_OPERATOR_DELETE_P,
does not cause the deleted object to be escaped.  It also has no
other interesting side-effects for PTA so skip it like we do
for BUILT_IN_FREE.


Hmm, this is true of the default implementation, but since the function

is replaceable, we don't know what a user definition might do with the
pointer.


But can the object still be 'used' after delete? Can delete fail /
throw?

What guarantee does the predicate give us?


The deallocation function is called as part of a delete expression in
order
to
release the storage for an object, ending its lifetime (if it was not
ended
by
a destructor), so no, the object can't be used afterward.


OK, but the delete operator can access the object contents if there
wasn't a destructor ...



A deallocation function that throws has undefined behavior.


OK, so it seems the 'replaceable' operators are the global ones
(for user-defined/class-specific placement variants I see arbitrary
extra arguments that we'd possibly need to handle).

I'm happy to revert but I'd like to have a testcase that FAILs
with the patch ;)

Now, the following aborts:

struct X {
 static struct X saved;
 int *p;
 X() { __builtin_memcpy (this, , sizeof (X)); }
};
void operator delete (void *p)
{
 __builtin_memcpy (::saved, p, sizeof (X));
}
int main()
{
 int y = 1;
 X *p = new X;
 p->p = 
 delete p;
 X *q = new X;
 *(q->p) = 2;
 if (y != 2)
   __builtin_abort ();
}

and I could fix this by not making *p but what *p points to escape.
The testcase is of course maximally awkward, but hey ... ;)

Now this would all be moot if operator delete may not access
the object (or if the object contents are undefined at that point).

Oh, and the testcase segfaults when compiled with GCC 10 because
there we elide the new X / delete p pair ... which is invalid then?
Hmm, we emit

 MEM[(struct X *)_8] ={v} {CLOBBER};
 operator delete (_8, 8);

so the object contents are undefined _before_ calling delete
even when I do not have a DTOR?  That is, the above,
w/o -fno-lifetime-dse, makes the PTA patch OK for the testcase.


Yes, all classes have a destructor, even if it's trivial, so the object's
lifetime definitely ends before the call to operator delete. This is less
clear for scalar objects, but treating them similarly would be consistent
with
other recent changes, so I think it's fine for us to assume that scalar
objects are also invalidated before the call to operator delete.  But of
course this doesn't apply to explicit calls to operator delete outside of a
delete expression.


OK, so change the testcase main slightly to

int main()
{
int y = 1;
X *p = new X;
p->p = 
::operator delete(p);
X *q = new X;
*(q->p) = 2;
if (y != 2)
  __builtin_abort ();
}

in this case the lifetime of *p does not end before calling
::operator delete() and delete can stash the object contents
somewhere before ending its lifetime.  For the very same reason
we may not elide a new/delete pair like in

int main()
{
int *p = new int;
*p = 1;
::operator delete (p);
}


Correct; the permission to elide new/delete pairs are for the expressions, not
the functions.


which we before the change did not do only because calling
operator delete made p escape.  Unfortunately points-to analysis
cannot really reconstruct whether delete was called as part of
a delete expression or directly (and thus whether object lifetime
ended already), neither can DCE.  So I guess we need to mark
the operator delete call in some way to make those transforms
safe.  At least currently any operator delete call makes the
alias guarantee of a operator new call moot by forcing the object
to be aliased with all global and escaped memory ...

Looks like there are some unallocated flags for CALL_EXPR we could
pick but I wonder if we can recycle protected_flag which is

 CALL_FROM_THUNK_P and
 CALL_ALLOCA_FOR_VAR_P in
 CALL_EXPR

for calls to DECL_IS_OPERATOR_{NEW,DELETE}_P, thus whether
we have CALL_FROM_THUNK_P for those operators.  Guess picking
a new flag is safer.


We won't ever call those operators from a thunk, so it should be OK to reuse
it.


But, does it seem correct that we need to distinguish
delete expressions from plain calls to operator delete?


A reason for that distinction came up in the context of omitting new/delete
pairs: we want to consider the operator first called by the new or delete
expression, not a call from that first operator to another operator 

Re: [PATCH v3] c++: Implement -Wrange-loop-construct [PR94695]

2020-09-28 Thread Jason Merrill via Gcc-patches

On 9/28/20 12:30 PM, Marek Polacek wrote:

On Sat, Sep 26, 2020 at 01:22:41AM -0400, Jason Merrill wrote:

+bool
+ref_conv_binds_directly_p (tree type, tree expr)
+{
+  gcc_assert (TYPE_REF_P (type));
+  conversion *conv = implicit_conversion (type, TREE_TYPE (expr), expr,
+ /*c_cast_p=*/false,
+ LOOKUP_IMPLICIT, tf_none);
+  return conv && !conv_binds_ref_to_prvalue (conv);


You probably want to free any allocated conversions, like in
can_convert_arg.


I ought to free them, indeed.  Apologies for missing that.  Fixed:

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

-- >8 --
This new warning can be used to prevent expensive copies inside range-based
for-loops, for instance:

   struct S { char arr[128]; };
   void fn () {
 S arr[5];
 for (const auto x : arr) {  }
   }

where auto deduces to S and then we copy the big S in every iteration.
Using "const auto " would not incur such a copy.  With this patch the
compiler will warn:

q.C:4:19: warning: loop variable 'x' creates a copy from type 'const S' 
[-Wrange-loop-construct]
 4 |   for (const auto x : arr) {  }
   |   ^
q.C:4:19: note: use reference type 'const S&' to prevent copying
 4 |   for (const auto x : arr) {  }


It's unfortunate that we seem to suggest the unnecessary change from 
auto to S here.  Maybe just say "reference type" without printing the type?



   |   ^
   |   &

As per Clang, this warning is suppressed for trivially copyable types
whose size does not exceed 64B.  The tricky part of the patch was how
to figure out if using a reference would have prevented a copy.  To
that point, I'm using the new function called ref_conv_binds_directly_p.

This warning is enabled by -Wall.  Further warnings of similar nature
should follow soon.

gcc/c-family/ChangeLog:

PR c++/94695
* c.opt (Wrange-loop-construct): New option.

gcc/cp/ChangeLog:

PR c++/94695
* call.c (ref_conv_binds_directly_p): New function.
* cp-tree.h (ref_conv_binds_directly_p): Declare.
* parser.c (warn_for_range_copy): New function.
(cp_convert_range_for): Call it.

gcc/ChangeLog:

PR c++/94695
* doc/invoke.texi: Document -Wrange-loop-construct.

gcc/testsuite/ChangeLog:

PR c++/94695
* g++.dg/warn/Wrange-loop-construct.C: New test.
---
  gcc/c-family/c.opt|   4 +
  gcc/cp/call.c |  22 ++
  gcc/cp/cp-tree.h  |   1 +
  gcc/cp/parser.c   |  68 +-
  gcc/doc/invoke.texi   |  21 +-
  .../g++.dg/warn/Wrange-loop-construct.C   | 207 ++
  6 files changed, 318 insertions(+), 5 deletions(-)
  create mode 100644 gcc/testsuite/g++.dg/warn/Wrange-loop-construct.C

diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
index 7761eefd203..bbf7da89658 100644
--- a/gcc/c-family/c.opt
+++ b/gcc/c-family/c.opt
@@ -800,6 +800,10 @@ Wpacked-not-aligned
  C ObjC C++ ObjC++ Var(warn_packed_not_aligned) Warning LangEnabledBy(C ObjC 
C++ ObjC++,Wall)
  Warn when fields in a struct with the packed attribute are misaligned.
  
+Wrange-loop-construct

+C++ ObjC++ Var(warn_range_loop_construct) Warning LangEnabledBy(C++ 
ObjC++,Wall)
+Warn when a range-based for-loop is creating unnecessary copies.
+
  Wredundant-tags
  C++ ObjC++ Var(warn_redundant_tags) Warning
  Warn when a class or enumerated type is referenced using a redundant 
class-key.
diff --git a/gcc/cp/call.c b/gcc/cp/call.c
index 5606389f4bd..1e5fffe20ae 100644
--- a/gcc/cp/call.c
+++ b/gcc/cp/call.c
@@ -8429,6 +8429,28 @@ conv_binds_ref_to_prvalue (conversion *c)
return false;
  }
  
+/* True iff converting EXPR to a reference type TYPE does not involve

+   creating a temporary.  */
+
+bool
+ref_conv_binds_directly_p (tree type, tree expr)
+{
+  gcc_assert (TYPE_REF_P (type));
+
+  /* Get the high-water mark for the CONVERSION_OBSTACK.  */
+  void *p = conversion_obstack_alloc (0);
+
+  conversion *conv = implicit_conversion (type, TREE_TYPE (expr), expr,
+ /*c_cast_p=*/false,
+ LOOKUP_IMPLICIT, tf_none);
+  bool ret = conv && !conv->bad_p && !conv_binds_ref_to_prvalue (conv);
+
+  /* Free all the conversions we allocated.  */
+  obstack_free (_obstack, p);
+
+  return ret;
+}
+
  /* Call the trivial destructor for INSTANCE, which can be either an lvalue of
 class type or a pointer to class type.  If NO_PTR_DEREF is true and
 INSTANCE has pointer type, clobber the pointer rather than what it points
diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index b7f5b6b399f..303e3b53e9d 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -6225,6 +6225,7 @@ extern bool sufficient_parms_p
(const_tree);
  extern tree 

Re: [OG10] Backporting + merge of GCC 10 into branch + pre-applying (OpenMP: Handle cpp_implicit_alias in declare-target discovery)

2020-09-28 Thread Tobias Burnus

On 9/18/20 7:51 PM, Tobias Burnus wrote:


On 9/17/20 7:04 PM, Tobias Burnus wrote:

OG10 = devel/omp/gcc-10

Added additionally:


And now:

3a857fecdc2 Merge remote-tracking branch 'origin/releases/gcc-10' into 
devel/omp/gcc-10

845a9a25733 OpenMP: Handle cpp_implicit_alias in declare-target discovery 
(PR96390)
→ update previous patch for upstream review changes

9ea3f104a68 gomp/pr94874.c: Update scan-tree-dump

Tobias

-
Mentor Graphics (Deutschland) GmbH, Arnulfstraße 201, 80634 München / Germany
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Alexander 
Walter


[patch] Do not use doloop pattern with pragma Unroll

2020-09-28 Thread Eric Botcazou
Hi,

this fixes the following Ada failure on 64-bit PowerPC:

-FAIL: gnat.dg/unroll4.adb scan-rtl-dump-times loop2_unroll "optimized: loop 
unrolled 7 times" 2

The IVOPTS pass detects a doloop pattern and consequently discombobulates the 
loop sufficiently as to make it hard for the RTL unrolling pass to compute the 
best number of iterations.

Tested on PowerPC64/Linux and x86-64/Linux, OK for the mainline?


2020-09-28  Eric Botcazou  

* tree-ssa-loop-ivopts.c (analyze_and_mark_doloop_use): Bail out if
the loop is subject to a pragma Unroll with no specific count.

-- 
Eric Botcazoudiff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c
index 5acf044161c..da04dd5ba0a 100644
--- a/gcc/tree-ssa-loop-ivopts.c
+++ b/gcc/tree-ssa-loop-ivopts.c
@@ -7947,6 +7947,9 @@ analyze_and_mark_doloop_use (struct ivopts_data *data)
   if (!flag_branch_on_count_reg)
 return;
 
+  if (data->current_loop->unroll == USHRT_MAX)
+return;
+
   if (!generic_predict_doloop_p (data))
 return;
 


Re: [PATCH v3] c, c++: Implement -Wsizeof-array-div [PR91741]

2020-09-28 Thread Marek Polacek via Gcc-patches
On Tue, Sep 22, 2020 at 04:07:41PM -0400, Jason Merrill wrote:
> On 9/22/20 1:29 PM, Marek Polacek wrote:
> > Ping.
> 
> The C++ change is OK.

Ping for the C parts.

> > On Tue, Sep 15, 2020 at 04:33:05PM -0400, Marek Polacek via Gcc-patches 
> > wrote:
> > > On Tue, Sep 15, 2020 at 09:04:41AM +0200, Jakub Jelinek via Gcc-patches 
> > > wrote:
> > > > On Mon, Sep 14, 2020 at 09:30:44PM -0400, Marek Polacek via Gcc-patches 
> > > > wrote:
> > > > > --- a/gcc/c/c-tree.h
> > > > > +++ b/gcc/c/c-tree.h
> > > > > @@ -147,6 +147,11 @@ struct c_expr
> > > > >etc), so we stash a copy here.  */
> > > > > source_range src_range;
> > > > > +  /* True iff the sizeof expression was enclosed in parentheses.
> > > > > + NB: This member is currently only initialized when 
> > > > > .original_code
> > > > > + is a SIZEOF_EXPR.  ??? Add a default constructor to this class. 
> > > > >  */
> > > > > +  bool parenthesized_p;
> > > > > +
> > > > > /* Access to the first and last locations within the source 
> > > > > spelling
> > > > >of this expression.  */
> > > > > location_t get_start () const { return src_range.m_start; }
> > > > 
> > > > I think a magic tree code would be better, c_expr is used in too many 
> > > > places
> > > > and returned by many functions, so it is copied over and over.
> > > > Even if you must add it, it would be better to change the struct layout,
> > > > because right now there are fields: tree, location_t, tree, 
> > > > 2xlocation_t,
> > > > which means 32-bit gap on 64-bit hosts before the second tree, so the 
> > > > new
> > > > field would fit in there.  But, if it is mostly uninitialized, it is 
> > > > kind of
> > > > unclean.
> > > 
> > > Ok, here's a version with PAREN_SIZEOF_EXPR.  It doesn't require changes 
> > > to
> > > c_expr, but adding a new tree code is always a pain...
> > > 
> > > Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?
> > > 
> > > -- >8 --
> > > This patch implements a new warning, -Wsizeof-array-div.  It warns about
> > > code like
> > > 
> > >int arr[10];
> > >sizeof (arr) / sizeof (short);
> > > 
> > > where we have a division of two sizeof expressions, where the first
> > > argument is an array, and the second sizeof does not equal the size
> > > of the array element.  See e.g. 
> > > .
> > > 
> > > Clang makes it possible to suppress the warning by parenthesizing the
> > > second sizeof like this:
> > > 
> > >sizeof (arr) / (sizeof (short));
> > > 
> > > so I followed suit.  In the C++ FE this was rather easy, because
> > > finish_parenthesized_expr already set TREE_NO_WARNING.  In the C FE
> > > I've added a new tree code, PAREN_SIZEOF_EXPR, to discern between the
> > > non-() and () versions.
> > > 
> > > This warning is enabled by -Wall.  An example of the output:
> > > 
> > > x.c:5:23: warning: expression does not compute the number of elements in 
> > > this array; element type is ‘int’, not ‘short int’ [-Wsizeof-array-div]
> > >  5 |   return sizeof (arr) / sizeof (short);
> > >|  ~^~~~
> > > x.c:5:25: note: add parentheses around ‘sizeof (short int)’ to silence 
> > > this warning
> > >  5 |   return sizeof (arr) / sizeof (short);
> > >| ^~
> > >| ( )
> > > x.c:4:7: note: array ‘arr’ declared here
> > >  4 |   int arr[10];
> > >|   ^~~
> > > 
> > > gcc/c-family/ChangeLog:
> > > 
> > >   PR c++/91741
> > >   * c-common.c (verify_tree): Handle PAREN_SIZEOF_EXPR.
> > >   (c_common_init_ts): Likewise.
> > >   * c-common.def (PAREN_SIZEOF_EXPR): New tree code.
> > >   * c-common.h (maybe_warn_sizeof_array_div): Declare.
> > >   * c-warn.c (sizeof_pointer_memaccess_warning): Unwrap NOP_EXPRs.
> > >   (maybe_warn_sizeof_array_div): New function.
> > >   * c.opt (Wsizeof-array-div): New option.
> > > 
> > > gcc/c/ChangeLog:
> > > 
> > >   PR c++/91741
> > >   * c-parser.c (c_parser_binary_expression): Implement -Wsizeof-array-div.
> > >   (c_parser_postfix_expression): Set PAREN_SIZEOF_EXPR.
> > >   (c_parser_expr_list): Handle PAREN_SIZEOF_EXPR like SIZEOF_EXPR.
> > >   * c-tree.h (char_type_p): Declare.
> > >   * c-typeck.c (char_type_p): No longer static.
> > > 
> > > gcc/cp/ChangeLog:
> > > 
> > >   PR c++/91741
> > >   * typeck.c (cp_build_binary_op): Implement -Wsizeof-array-div.
> > > 
> > > gcc/ChangeLog:
> > > 
> > >   PR c++/91741
> > >   * doc/invoke.texi: Document -Wsizeof-array-div.
> > > 
> > > gcc/testsuite/ChangeLog:
> > > 
> > >   PR c++/91741
> > >   * c-c++-common/Wsizeof-pointer-div.c: Add dg-warning.
> > >   * c-c++-common/Wsizeof-array-div1.c: New test.
> > >   * g++.dg/warn/Wsizeof-array-div1.C: New test.
> > >   * g++.dg/warn/Wsizeof-array-div2.C: New test.
> > > ---
> > >   gcc/c-family/c-common.c   |  2 +
> > >   gcc/c-family/c-common.def |  3 +
> 

Re: [PATCH] c++: Implement -Wrange-loop-construct [PR94695]

2020-09-28 Thread Marek Polacek via Gcc-patches
On Fri, Sep 25, 2020 at 04:31:16PM -0600, Martin Sebor wrote:
> On 9/24/20 6:05 PM, Marek Polacek via Gcc-patches wrote:
> > This new warning can be used to prevent expensive copies inside range-based
> > for-loops, for instance:
> > 
> >struct S { char arr[128]; };
> >void fn () {
> >  S arr[5];
> >  for (const auto x : arr) {  }
> >}
> > 
> > where auto deduces to S and then we copy the big S in every iteration.
> > Using "const auto " would not incur such a copy.  With this patch the
> > compiler will warn:
> > 
> > q.C:4:19: warning: loop variable 'x' creates a copy from type 'const S' 
> > [-Wrange-loop-construct]
> >  4 |   for (const auto x : arr) {  }
> >|   ^
> > q.C:4:19: note: use reference type 'const S&' to prevent copying
> >  4 |   for (const auto x : arr) {  }
> >|   ^
> >|   &
> > 
> > As per Clang, this warning is suppressed for trivially copyable types
> > whose size does not exceed 64B.  The tricky part of the patch was how
> > to figure out if using a reference would have prevented a copy.  I've
> > used perform_implicit_conversion to perform the imaginary conversion.
> > Then if the conversion doesn't have any side-effects, I assume it does
> > not call any functions or create any TARGET_EXPRs, and is just a simple
> > assignment like this one:
> > 
> >const T  = (const T &) <__for_begin>;
> > 
> > But it can also be a CALL_EXPR:
> > 
> >x = (const T &) Iterator::operator* (&__for_begin)
> > 
> > which is still fine -- we just use the return value and don't create
> > any copies.
> > 
> > This warning is enabled by -Wall.  Further warnings of similar nature
> > should follow soon.
> 
> I've always thought a warning like this would be useful when passing
> large objects to functions by value.  Is adding one for these cases
> what you mean by future warnings?

No, but perhaps we should add it.  I don't know if we could still enable it by
-Wall.  We'd have to handle guaranteed copy elision and also the case when we
pass classes by invisible reference.  Unsure how much of the implementation
these warnings could share.

Do we have a request for the warning wrt passing chunky objects by value? 

As a user, I'd probably want to have the option of figuring out where I'm
copying large types, since it can be a performance issue.

> For the range loop, I wonder if more could be done to elide the copy
> and avoid the warning when it isn't really necessary.  For instance,
> for trivially copyable types like in your example, since x is const,
> modifying it would be undefined, and so when we can prove that
> the original object itself isn't modified (e.g., because it's
> declared const, or because it can't be accessed in the loop),
> there should be no need to make a copy on each iteration.  Using
> a reference to the original object should be sufficient.  Does C++
> rule out such an optimization?

Well, changing const auto x in

struct S { char arr[128]; S(); };

void
fn ()
{
  S a[5];
  for (const auto x : a)
decltype(x) k;
}

to const auto  would break this code.

> About the name of the option: my first thought was that it was
> about the construct known as the range loop, but after reading
> your description I wonder if it might actually primarily be about
> constructing expensive copies and the range loop is incidental.

It was a bit confusing to me too at first.  It's about constructing expensive
copies in range-based for-loops.  I don't think it's incidental that
it warns in loops only.

I'm not attached to the name but it's what Clang uses so we'll have to
follow.

> (It's impossible to tell from the Clang manual because its way
> of documenting warning options is to show examples of their text.)

Yes.  I really like that we provide code snippets showing what a warning
is supposed to warn on in our manual.  Let's keep it that way.

> Then again, I see it's related to -Wrange-loop-analysis so that
> suggests it is mainly about range loops, and that there may be
> a whole series of warnings and options related to it.  Can you
> please shed some light on that?  (E.g., what are some of
> the "further warnings of similar nature" about?)  I think it
> might also be helpful to expand the documentation a bit to help
> answer common questions (I came across the following post while
> looking it up: https://stackoverflow.com/questions/50066139).

I think right now it's like this (note the names and wording changed recently,
this is the latest version):

 -> -Wfor-loop-analysis
 |
-Wloop-analysis -| -> -Wrange-loop-bind-reference
 | |
 -> -Wrange-loop-analysis -|
   |
   -> -Wrange-loop-construct
   

* -Wloop-analysis and -Wrange-loop-analysis are umbrella flags.


[PATCH] Add type arg to TARGET_LIBC_HAS_FUNCTION

2020-09-28 Thread Tom de Vries
[ was: Re: [Patch][nvptx] return true in libc_has_function for
function_sincos ]

On 9/26/20 6:47 PM, Tobias Burnus wrote:
> Found when looking at PR97203 (but having no effect there).
> 
> The GCC ME optimizes with -O1 (or higher) the
>   a = sinf(x)
>   b = cosf(x)
> to __builtin_cexpi(x, , )
> (...i as in internal; like cexp(z) but with with __real__ z == 0)
> 
> 
> In expand_builtin_cexpi, that is handles as:
>   if (optab_handler (sincos_optab, mode) != CODE_FOR_nothing)
>     ...
>   else if (targetm.libc_has_function (function_sincos))
>     ...
>   else
>     fn = builtin_decl_explicit (BUILT_IN_CEXPF);
> 
> And the latter is done. As newlib's cexpf does not know that
> __real__ z == 0, it calculates 'r = expf (__real__ z)' before
> invoking sinf and cosf on __imag__ z.
> 
> Thus, it is much faster to call 'sincosf', which also exists
> in newlib.
> 
> Solution: Return true for targetm.libc_has_function (function_sincos).
> 
> 
> NOTE: With -funsafe-math-optimizations (-O0 or higher),
> sinf/cosf and sincosf invoke .sin.approx/.cos/.approx instead of
> doing a library call.

This version takes care to enable sincos and sincosf, but not sincosl.

Target hook changes OK for trunk?

Thanks,
- Tom
[nvptx] Add type arg to TARGET_LIBC_HAS_FUNCTION

GCC has a target hook TARGET_LIBC_HAS_FUNCTION, which tells the compiler
which functions it can expect to be present in libc.

The default target hook does not include the sincos functions.

The nvptx port of newlib does include sincos and sincosf, but not sincosl.

The target hook TARGET_LIBC_HAS_FUNCTION does not distinguish between sincos,
sincosf and sincosl, so if we enable it for the sincos functions, then for
test.c:
...
long double x, a, b;
int main (void) {
  x = 0.5;
  a = sinl (x);
  b = cosl (x);
  printf ("a: %f\n", (double)a);
  printf ("b: %f\n", (double)b);
  return 0;
}
...
we introduce a regression:
...
$ gcc test.c -lm -O2
unresolved symbol sincosl
collect2: error: ld returned 1 exit status
...

Add a type argument to target hook TARGET_LIBC_HAS_FUNCTION_TYPE, and use it
in nvptx_libc_has_function_type to enable sincos and sincosf, but not sincosl.

For now, a non-null type argument is only supported for
fn_class == function_sincos.

Tested on nvptx.

2020-09-28  Tobias Burnus  
	Tom de Vries  

	* builtins.c (expand_builtin_cexpi, fold_builtin_sincos): Update
	targetm.libc_has_function call.
	* builtins.def (DEF_C94_BUILTIN, DEF_C99_BUILTIN, DEF_C11_BUILTIN):
	(DEF_C2X_BUILTIN, DEF_C99_COMPL_BUILTIN, DEF_C99_C90RES_BUILTIN):
	Same.
	* config/darwin-protos.h (darwin_libc_has_function): Update prototype.
	* config/darwin.c (darwin_libc_has_function): Add arg.
	* config/linux-protos.h (linux_libc_has_function): Update prototype.
	* config/linux.c (linux_libc_has_function): Add arg.
	* config/i386/i386.c (ix86_libc_has_function): Update
	targetm.libc_has_function call.
	* config/nvptx/nvptx.c (nvptx_libc_has_function): New function.
	(TARGET_LIBC_HAS_FUNCTION): Redefine to nvptx_libc_has_function.
	* convert.c (convert_to_integer_1): Update targetm.libc_has_function
	call.
	* match.pd: Same.
	* target.def (libc_has_function): Add arg.
	* doc/tm.texi: Regenerate.
	* targhooks.c (default_libc_has_function, gnu_libc_has_function)
	(no_c99_libc_has_function): Add arg.
	* targhooks.h (default_libc_has_function,  no_c99_libc_has_function)
	(gnu_libc_has_function): Update prototype.
	* tree-ssa-math-opts.c (pass_cse_sincos::execute): Update
	targetm.libc_has_function call.

---
 gcc/builtins.c |  4 ++--
 gcc/builtins.def   | 20 ++--
 gcc/config/darwin-protos.h |  2 +-
 gcc/config/darwin.c| 16 ++--
 gcc/config/i386/i386.c |  2 +-
 gcc/config/linux-protos.h  |  2 +-
 gcc/config/linux.c | 16 ++--
 gcc/config/nvptx/nvptx.c   | 20 
 gcc/convert.c  |  8 
 gcc/doc/tm.texi|  5 +++--
 gcc/fortran/f95-lang.c |  4 ++--
 gcc/match.pd   |  6 +++---
 gcc/target.def |  5 +++--
 gcc/targhooks.c| 44 +---
 gcc/targhooks.h|  6 +++---
 gcc/tree-ssa-math-opts.c   |  8 +---
 16 files changed, 131 insertions(+), 37 deletions(-)

diff --git a/gcc/builtins.c b/gcc/builtins.c
index cac842fd4a3..85f5f630a0f 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -2733,7 +2733,7 @@ expand_builtin_cexpi (tree exp, rtx target)
   /* Compute into op1 and op2.  */
   expand_twoval_unop (sincos_optab, op0, op2, op1, 0);
 }
-  else if (targetm.libc_has_function (function_sincos))
+  else if (targetm.libc_has_function (function_sincos, type))
 {
   tree call, fn = NULL_TREE;
   tree top1, top2;
@@ -9770,7 +9770,7 @@ fold_builtin_sincos (location_t loc,
 }
   if (!call)
 {
-  if (!targetm.libc_has_function (function_c99_math_complex)
+  if (!targetm.libc_has_function (function_c99_math_complex, NULL_TREE)
 	  || 

RE: [PATCH v2 3/16]middle-end Add basic SLP pattern matching scaffolding.

2020-09-28 Thread Tamar Christina



> -Original Message-
> From: Gcc-patches  On Behalf Of Tamar
> Christina
> Sent: Monday, September 28, 2020 3:56 PM
> To: Richard Biener 
> Cc: nd ; gcc-patches@gcc.gnu.org; o...@ucw.cz
> Subject: RE: [PATCH v2 3/16]middle-end Add basic SLP pattern matching
> scaffolding.
> 
> Hi Richi,
> 
> Thanks for the review!
> 
> Just some answers to your questions:
> 
> > -Original Message-
> > From: rguent...@c653.arch.suse.de  On
> > Behalf Of Richard Biener
> > Sent: Monday, September 28, 2020 1:37 PM
> > To: Tamar Christina 
> > Cc: gcc-patches@gcc.gnu.org; nd ; o...@ucw.cz
> > Subject: Re: [PATCH v2 3/16]middle-end Add basic SLP pattern matching
> > scaffolding.
> >
> > On Fri, 25 Sep 2020, Tamar Christina wrote:
> >
> > > Hi All,
> > >
> > > This patch adds the basic infrastructure for doing pattern matching
> > > on SLP
> > trees.
> > > This is done immediately after the SLP tree creation because it can
> > > change the shape of the tree in radical ways and so we would like to
> > > do it before any analysis is performed on the tree.
> > >
> > > A new file tree-vect-slp-patterns.c is added which contains all the
> > > code for pattern matching on SLP trees.
> > >
> > > This cover letter is short because the changes are heavily commented.
> > >
> > > All pattern matchers need to implement the abstract type
> > VectPatternMatch.
> > > The VectSimplePatternMatch abstract class provides some default
> > > functionality for pattern matchers that need to rebuild nodes.
> > >
> > > The pattern matcher requires if replacing a statement in a node,
> > > that ALL statements be replaced.
> > >
> > > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> > >
> > > Ok for master?
> >
> > +gcall *build ()
> > +{
> > +  stmt_vec_info stmt_info;
> > +
> >
> > please define functions out-of-line (apart from the 1-liners)
> >
> > +  /* We have to explicitly mark the old statement as unused
> > + because
> > during
> > +statement analysis the original and new pattern statement may
> > require
> > +different level of unrolling.  As an example add/sub when
> > vectorized
> > +without a pattern requires 4 copies, whereas with a
> > + COMPLEX_ADD
> > pattern
> > +this only requires 2 copies and the two statement will be
> > + treated
> > as
> > +hand unrolled.  That means that the analysis won't happen as
> > it'll find
> > +a mismatch.  So we don't analyze the old statement and if we
> > + end
> > up
> > +needing it, e.g. SLP fails then we have to quickly re-analyze it.
> > */
> > +  STMT_VINFO_RELEVANT (stmt_info) = vect_unused_in_scope;
> > +  STMT_VINFO_SLP_VECT_ONLY (call_stmt_info) = true;
> > +  STMT_VINFO_RELATED_STMT (call_stmt_info) = stmt_info;
> >
> > so this means all uses have to be inside the pattern as otherwise
> > there may be even non-SLP uses.  vect_mark_pattern_stmts supports
> > detecting patterns of patterns, I suppose the two-phase analysis for
> > SLP patterns does not support this right now?
> >
> > +  SLP_TREE_CODE (this->m_node) = gimple_expr_code (call_stmt);;
> >
> > double ;, just make it CALL_EXPR literally (or leave it ERROR_MARK)
> >
> > You seem to do in-place changing of the SLP node you match off?
> 
> Yes since this would allow me to change the root node as well, though
> thinking about it I can probably do it by passing it as a reference which then
> would allow me to re-use vect_create_new_slp_node which is probably
> preferable.
> 
> >
> > @@ -2192,6 +2378,17 @@ vect_analyze_slp_instance (vec_info *vinfo,
> >   _size, bst_map);
> >if (node != NULL)
> >  {
> > +  /* Temporarily allow add_stmt calls again.  */
> > +  vinfo->stmt_vec_info_ro = false;
> > +
> > +  /* See if any patterns can be found in the constructed SLP tree
> > +before we do any analysis on it.  */
> > +  vect_match_slp_patterns (node, vinfo, group_size, _nunits,
> > +  matches, , _size,
> > + bst_map);
> > +
> > +  /* After this no more add_stmt calls are allowed.  */
> > +  vinfo->stmt_vec_info_ro = true;
> > +
> >
> > I think this is a bit early to match patterns - I'd defer it to the
> > point where all entries into the same SLP subgraph are analyzed, thus
> > somewhere at the end of vect_analyze_slp loop over all instances and
> > match patterns?  That way phases are more clearly separated.
> 
> That would probably work, my only worry is that the SLP analysis itself may
> fail and bail out at
> 
> /* If the loads and stores can be handled with load/store-lane
>instructions do not generate this SLP instance.  */
> if (is_a  (vinfo)
> && loads_permuted
> && dr && vect_store_lanes_supported (vectype, group_size,
> false))
> 
> Which in the initial tree may be true, but in the patterned tree may not be.
> In the previous revision of the patch you had suggested I 

[committed] aarch64: Fix ordering of aarch64-cores.def

2020-09-28 Thread Alex Coplan
Hi all,

This patch moves the entry for Neoverse N2 (an Armv8.5-A CPU) after Saphira (an
Armv8.4-A CPU) to preserve the overall ordering in the file.

Committing as obvious.

Thanks,
Alex

---

gcc/ChangeLog:

* config/aarch64/aarch64-cores.def: Move neoverse-n2 after saphira.
* config/aarch64/aarch64-tune.md: Regenerate.
diff --git a/gcc/config/aarch64/aarch64-cores.def 
b/gcc/config/aarch64/aarch64-cores.def
index 469ee99824c..52234810a08 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -136,12 +136,12 @@ AARCH64_CORE("thunderx3t110",  thunderx3t110,  
thunderx3t110, 8_3A,  AARCH64_FL_
 AARCH64_CORE("zeus", zeus, cortexa57, 8_4A,  AARCH64_FL_FOR_ARCH8_4 | 
AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | 
AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, 
neoversen1, 0x41, 0xd40, -1)
 AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, 8_4A,  
AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | 
AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | 
AARCH64_FL_RNG, neoversen1, 0x41, 0xd40, -1)
 
-/* Armv8.5-A Architecture Processors.  */
-AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, 8_5A, 
AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | 
AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | 
AARCH64_FL_MEMTAG, neoversen1, 0x41, 0xd49, -1)
-
 /* Qualcomm ('Q') cores. */
 AARCH64_CORE("saphira", saphira,saphira,8_4A,  
AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC, saphira,   0x51, 
0xC01, -1)
 
+/* Armv8.5-A Architecture Processors.  */
+AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, 8_5A, 
AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | 
AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | 
AARCH64_FL_MEMTAG, neoversen1, 0x41, 0xd49, -1)
+
 /* ARMv8-A big.LITTLE implementations.  */
 
 AARCH64_CORE("cortex-a57.cortex-a53",  cortexa57cortexa53, cortexa53, 8A,  
AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE 
(0xd07, 0xd03), -1)
diff --git a/gcc/config/aarch64/aarch64-tune.md 
b/gcc/config/aarch64/aarch64-tune.md
index 3cf69ceadaf..bb5d8da6564 100644
--- a/gcc/config/aarch64/aarch64-tune.md
+++ b/gcc/config/aarch64/aarch64-tune.md
@@ -1,5 +1,5 @@
 ;; -*- buffer-read-only: t -*-
 ;; Generated automatically by gentune.sh from aarch64-cores.def
 (define_attr "tune"
-   
"cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa65,cortexa65ae,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,zeus,neoversev1,neoversen2,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82"
+   
"cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa65,cortexa65ae,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,zeus,neoversev1,saphira,neoversen2,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82"
(const (symbol_ref "((enum attr_tune) aarch64_tune)")))


Re: [PATCH v2] builtins: rs6000: Add builtins for fegetround, feclearexcept and feraiseexcept [PR94193]

2020-09-28 Thread will schmidt via Gcc-patches
On Fri, 2020-09-04 at 12:52 -0300, Raoni Fassina Firmino via Gcc-patches wrote:
> Changes since v1[1]:
>   - Fixed english spelling;
>   - Fixed code-style;
>   - Changed match operand predicate in feclearexcept and feraiseexcept;
>   - Changed testcase options;
>   - Minor changes in test code to be C90 compatible;
>   - Other minor changes sugested by Segher;
>   - Changed subject line tag (not sure if I tagged correctly or should
> include optabs: also)
> 
> There is one pending question raised by Segher, It is about adding
> documentation, I am not sure if it is needed and if so, where it
> should be. I will quote the relevant part of the conversation[2] from
> the v1 thread for context:
> 
>   > > > +OPTAB_D (fegetround_optab, "fegetround$a")
>   > > > +OPTAB_D (feclearexcept_optab, "feclearexcept$a")
>   > > > +OPTAB_D (feraiseexcept_optab, "feraiseexcept$a")
>   > >␣
>   > > Should those be documented somewhere?  (In gcc/doc/ somewhere).
>   >
>   > I am lost on that one. I took a look on the docs (I hope looking on the
>   > online docs was good enough) and I didn't find a place where i feel it
>   > sits well. On the PowerPC target specific sections (6.60.22 Basic
>   > PowerPC Built-in Functions), I didn't found it mentioning builtins that
>   > are optimizations for the standard library functions, but we do have
>   > many of these for Power.  Then, on the generic section (6.59 Other
>   > Built-in Functions Provided by GCC) it mentions C99 functions that have
>   > builtins but it seems like it mentions builtins that have target
>   > independent implementation, or at least it dos not say that some
>   > builtins may be implemented on only some targets.  And in this case
>   > there is no implementation (for now) for any other target that is not
>   > PowerPc.
>   >
>   > So, I don't know if or where this should be documented.
> 
> tested on top of master (c5a6c2237a1156dc43fa32c938fc32acdfc2bff9)
> on the following plataforms with no regression:
>   - powerpc64le-linux-gnu (Power 9)
>   - powerpc64le-linux-gnu (Power 8)
> 
> [1] https://gcc.gnu.org/pipermail/gcc-patches/2020-August/552024.html
> [2] https://gcc.gnu.org/pipermail/gcc-patches/2020-September/553295.html
> 
>  8< 
> 
> This optimizations were originally in glibc, but was removed
> and sugested that they were a good fit as gcc builtins[1].
> 
> The associated bugreport: PR target/94193
> 
> [1] https://sourceware.org/legacy-ml/libc-alpha/2020-03/msg00047.html
> https://sourceware.org/legacy-ml/libc-alpha/2020-03/msg00080.html
> 
> 2020-08-13  Raoni Fassina Firmino  
> 
> gcc/ChangeLog:
> 
>   * builtins.c (expand_builtin_fegetround): New function.
>   (expand_builtin_feclear_feraise_except): New function.
>   (expand_builtin): Add cases for BUILT_IN_FEGETROUND,
>   BUILT_IN_FECLEAREXCEPT and BUILT_IN_FERAISEEXCEPT
>   * config/rs6000/rs6000.md (fegetroundsi): New pattern.
>   (feclearexceptsi): New Pattern.
>   (feraiseexceptsi): New Pattern.
>   * optabs.def (fegetround_optab): New optab.
>   (feclearexcept_optab): New optab.
>   (feraiseexcept_optab): New optab.

> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/powerpc/builtin-feclearexcept-feraiseexcept-1.c: New test.
>   * gcc.target/powerpc/builtin-feclearexcept-feraiseexcept-2.c: New test.
>   * gcc.target/powerpc/builtin-fegetround.c: New test.
> 

A few cosmetic nits below,.. this is perhaps enough activity to get
others to take a look.  :-) 


> Signed-off-by: Raoni Fassina Firmino 
> 
> FIX: patch_v1 (2)
> ---
>  gcc/builtins.c|  76 ++
>  gcc/config/rs6000/rs6000.md   |  82 +++
>  gcc/optabs.def|   4 +
>  .../builtin-feclearexcept-feraiseexcept-1.c   |  68 +
>  .../builtin-feclearexcept-feraiseexcept-2.c   | 133 ++
>  .../gcc.target/powerpc/builtin-fegetround.c   |  36 +
>  6 files changed, 399 insertions(+)
>  create mode 100644 
> gcc/testsuite/gcc.target/powerpc/builtin-feclearexcept-feraiseexcept-1.c
>  create mode 100644 
> gcc/testsuite/gcc.target/powerpc/builtin-feclearexcept-feraiseexcept-2.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/builtin-fegetround.c
> 
> diff --git a/gcc/builtins.c b/gcc/builtins.c
> index 97f1a184dc6..a6f6141edb7 100644
> --- a/gcc/builtins.c
> +++ b/gcc/builtins.c
> @@ -115,6 +115,9 @@ static rtx expand_builtin_mathfn_3 (tree, rtx, rtx);
>  static rtx expand_builtin_mathfn_ternary (tree, rtx, rtx);
>  static rtx expand_builtin_interclass_mathfn (tree, rtx);
>  static rtx expand_builtin_sincos (tree);
> +static rtx expand_builtin_fegetround (tree, rtx, machine_mode);
> +static rtx expand_builtin_feclear_feraise_except (tree, rtx, machine_mode,
> +   optab);


>  static rtx expand_builtin_cexpi (tree, rtx);
>  static rtx expand_builtin_int_roundingfn (tree, rtx);
>  static rtx 

Re: [PATCH v3] c++: Implement -Wrange-loop-construct [PR94695]

2020-09-28 Thread Marek Polacek via Gcc-patches
On Sat, Sep 26, 2020 at 01:22:41AM -0400, Jason Merrill wrote:
> > +bool
> > +ref_conv_binds_directly_p (tree type, tree expr)
> > +{
> > +  gcc_assert (TYPE_REF_P (type));
> > +  conversion *conv = implicit_conversion (type, TREE_TYPE (expr), expr,
> > + /*c_cast_p=*/false,
> > + LOOKUP_IMPLICIT, tf_none);
> > +  return conv && !conv_binds_ref_to_prvalue (conv);
> 
> You probably want to free any allocated conversions, like in
> can_convert_arg.

I ought to free them, indeed.  Apologies for missing that.  Fixed:

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

-- >8 --
This new warning can be used to prevent expensive copies inside range-based
for-loops, for instance:

  struct S { char arr[128]; };
  void fn () {
S arr[5];
for (const auto x : arr) {  }
  }

where auto deduces to S and then we copy the big S in every iteration.
Using "const auto " would not incur such a copy.  With this patch the
compiler will warn:

q.C:4:19: warning: loop variable 'x' creates a copy from type 'const S' 
[-Wrange-loop-construct]
4 |   for (const auto x : arr) {  }
  |   ^
q.C:4:19: note: use reference type 'const S&' to prevent copying
4 |   for (const auto x : arr) {  }
  |   ^
  |   &

As per Clang, this warning is suppressed for trivially copyable types
whose size does not exceed 64B.  The tricky part of the patch was how
to figure out if using a reference would have prevented a copy.  To
that point, I'm using the new function called ref_conv_binds_directly_p.

This warning is enabled by -Wall.  Further warnings of similar nature
should follow soon.

gcc/c-family/ChangeLog:

PR c++/94695
* c.opt (Wrange-loop-construct): New option.

gcc/cp/ChangeLog:

PR c++/94695
* call.c (ref_conv_binds_directly_p): New function.
* cp-tree.h (ref_conv_binds_directly_p): Declare.
* parser.c (warn_for_range_copy): New function.
(cp_convert_range_for): Call it.

gcc/ChangeLog:

PR c++/94695
* doc/invoke.texi: Document -Wrange-loop-construct.

gcc/testsuite/ChangeLog:

PR c++/94695
* g++.dg/warn/Wrange-loop-construct.C: New test.
---
 gcc/c-family/c.opt|   4 +
 gcc/cp/call.c |  22 ++
 gcc/cp/cp-tree.h  |   1 +
 gcc/cp/parser.c   |  68 +-
 gcc/doc/invoke.texi   |  21 +-
 .../g++.dg/warn/Wrange-loop-construct.C   | 207 ++
 6 files changed, 318 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/warn/Wrange-loop-construct.C

diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
index 7761eefd203..bbf7da89658 100644
--- a/gcc/c-family/c.opt
+++ b/gcc/c-family/c.opt
@@ -800,6 +800,10 @@ Wpacked-not-aligned
 C ObjC C++ ObjC++ Var(warn_packed_not_aligned) Warning LangEnabledBy(C ObjC 
C++ ObjC++,Wall)
 Warn when fields in a struct with the packed attribute are misaligned.
 
+Wrange-loop-construct
+C++ ObjC++ Var(warn_range_loop_construct) Warning LangEnabledBy(C++ 
ObjC++,Wall)
+Warn when a range-based for-loop is creating unnecessary copies.
+
 Wredundant-tags
 C++ ObjC++ Var(warn_redundant_tags) Warning
 Warn when a class or enumerated type is referenced using a redundant class-key.
diff --git a/gcc/cp/call.c b/gcc/cp/call.c
index 5606389f4bd..1e5fffe20ae 100644
--- a/gcc/cp/call.c
+++ b/gcc/cp/call.c
@@ -8429,6 +8429,28 @@ conv_binds_ref_to_prvalue (conversion *c)
   return false;
 }
 
+/* True iff converting EXPR to a reference type TYPE does not involve
+   creating a temporary.  */
+
+bool
+ref_conv_binds_directly_p (tree type, tree expr)
+{
+  gcc_assert (TYPE_REF_P (type));
+
+  /* Get the high-water mark for the CONVERSION_OBSTACK.  */
+  void *p = conversion_obstack_alloc (0);
+
+  conversion *conv = implicit_conversion (type, TREE_TYPE (expr), expr,
+ /*c_cast_p=*/false,
+ LOOKUP_IMPLICIT, tf_none);
+  bool ret = conv && !conv->bad_p && !conv_binds_ref_to_prvalue (conv);
+
+  /* Free all the conversions we allocated.  */
+  obstack_free (_obstack, p);
+
+  return ret;
+}
+
 /* Call the trivial destructor for INSTANCE, which can be either an lvalue of
class type or a pointer to class type.  If NO_PTR_DEREF is true and
INSTANCE has pointer type, clobber the pointer rather than what it points
diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index b7f5b6b399f..303e3b53e9d 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -6225,6 +6225,7 @@ extern bool sufficient_parms_p
(const_tree);
 extern tree type_decays_to (tree);
 extern tree extract_call_expr  (tree);
 extern tree build_trivial_dtor_call(tree, bool = false);
+extern bool ref_conv_binds_directly_p 

Re: [PATCH] Add Missing FSF copyright notes for some x86 intrinsic headers

2020-09-28 Thread H.J. Lu via Gcc-patches
On Mon, Sep 28, 2020 at 9:04 AM Hongyu Wang via Gcc-patches
 wrote:
>
> Hi,
>
> Some x86 intrinsic headers is missing FSF copyright notes. This patch add
> the missed notes for those headers.
>
> OK for master?
>
> gcc/ChangeLog:
>
> * config/i386/amxbf16intrin.h: Add FSF copyright notes.
> * config/i386/amxint8intrin.h: Ditto.
> * config/i386/amxtileintrin.h: Ditto.
> * config/i386/avx512vp2intersectintrin.h: Ditto.
> * config/i386/avx512vp2intersectvlintrin.h: Ditto.
> * config/i386/pconfigintrin.h: Ditto.
> * config/i386/tsxldtrkintrin.h: Ditto.
> * config/i386/wbnoinvdintrin.h: Ditto.
>

I will check it for Hongyu tomorrow if there are no objections.

Thanks.

-- 
H.J.


RE: [PATCH v2 5/16]middle-end: Add shared machinery for matching patterns involving complex numbers.

2020-09-28 Thread Tamar Christina
Hi Richi,

> -Original Message-
> From: rguent...@c653.arch.suse.de  On
> Behalf Of Richard Biener
> Sent: Monday, September 28, 2020 2:22 PM
> To: Tamar Christina 
> Cc: gcc-patches@gcc.gnu.org; nd ; o...@ucw.cz
> Subject: Re: [PATCH v2 5/16]middle-end: Add shared machinery for matching
> patterns involving complex numbers.
> 
> On Fri, 25 Sep 2020, Tamar Christina wrote:
> 
> > Hi All,
> >
> > This patch adds shared machinery for detecting patterns having to do
> > with complex number operations.  The class ComplexPattern provides
> > helpers for matching and ultimately undoing the permutation in the
> > tree by rebuilding the graph.
> >
> > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> >
> > Ok for master?
> 
> I think you want to change all this to not look at individual
> stmts:
> 
> +vect_match_expression_p (slp_tree node, tree_code code, int base,
> + int
> idx,
> +stmt_vec_info *op1, stmt_vec_info *op2)
> +{
> +
> +  vec scalar_stmts = SLP_TREE_SCALAR_STMTS (node);
> +
> 
> _all_ lanes are supposed to match the operation in
> SLP_TREE_REPRESENTATIVE there's no need to do any operation-matching
> on lane granularity.
> 

That's fair, that flexibility seems like it indeed won't work since the 
statements are vectorized based on
SLP_TREE_REPRESENTATIVE anyway. So I'll simplify it.

> The only thing making a difference here is VEC_PERM_EXPR nodes where
> again - there's no need to look at (eventually non-existant!)
> SLP_TREE_SCALAR_STMTS but its SLP_TREE_REPRESENTATIVE.
> 
> +  vec children = SLP_TREE_CHILDREN (node);
> +
> +  /* If it's a VEC_PERM_EXPR we need to look one deeper.
> VEC_PERM_EXPR
> +only have one entry.  So pick on.  */
> +  if (node->code == VEC_PERM_EXPR)
> +   children = SLP_TREE_CHILDREN (children.last ());
> 
> that's too simplistic ;)
> 
> + *op1 = SLP_TREE_SCALAR_STMTS (children[0])[n];
> 
> please make op1,op2 slp_tree, not stmt_vec_info.
> 
> Where do you look at SLP_TREE_LANE_PERMUTATION at all?  I think the
> result of
> 

Here I have to admit that I have/am a bit confused as to the relation between 
the different permute fields.
LOAD permute is quite straight forward, LANE permute I think are 
shuffles/explicit permutes. 

But then I am lost as to the purpose of a VEC_PERM_EXPR nodes. Is it just a 
marker to indicate that some node below
has a load permute somewhere?

> +vect_detect_pair_op (int base, slp_tree node1, int offset1,
> + slp_tree
> node2,
> +int offset2, vec *ops)
> 
> could be simply the SLP_TREE_LANE_PERMUTATION? plus its two child
> nodes?

Right, if I understood correctly, on the two_operands case the lane permute
would tell me whether it's + or -, and in the case of the non- two_operands 
cases
I probably want to check that it's vNULL since any permute in the order changes
how the instruction accepts the inputs?

> 
> In the ComplexAddPattern patch I see
> 
> +  /* Correct the arguments after matching.  */
> +  std::swap (this->m_vects[1], this->m_vects[3]);
> 
> how's that necessary?  The replacement SLP node should end up with just a
> SLP_TREE_REPRESENTATIVE (the IFN function call).
> That is, the only thing necessary is verification / matching of the 
> appropriate
> "interleaving" scheme imposed by SLP_TREE_LANE_PERMUTATION.

But the order or the statements are important as those decide the 
LOAD_PERMUTATION
that build_slp_tree creates. 

So in this case the original statement is

   stmt 0 _39 = _37 + _12;
   stmt 1 _6 = _38 - _36;

{_12, _36} result in a LOAD_PERMUTATION of {1, 0} because of how the addition 
is done.
So to undo the LOAD_PERMUTE it has to build the new children using

   stmt 0 _39 = {_37, _36};
   stmt 1 _6 = {_38, _12};

So the scalar statements are purely a re-ordering to get it to rebuild the 
children correctly.

Now ADD is simplistic, the more complicated cases like MUL and FMA use this 
property to
Also rebuild the tree removing unneeded statements.  This method returns these 
and stores
them in the PatternMatch class, so I don't have to ask for them again later 
when replacing the
nodes.  Even for SLP_TREE_REPRESENTATIVE don't the arguments in the IFN call 
need to be
in such way that they both in the same place in the load chain?

> I guess things would go wrong if instead of effectively blending two vectors
> we'd interleave two smaller vector type vectors?  Say a 4-way add and a 4-
> way sub interleaved into a 8-way addsub, using V4SF and V8SF vectors?
> 

At this stage yes, most likely, but it should be rejected at the validate level.

What is also currently rejected is when some of the definitions are external,
which I think I should be able to handle. I'll fix that up with the rest of the 
changes.


Thanks for the feedback!

Regards,
Tamar

> Going to stop looking at the series at this point, one further note is that 
> all of
> the Complex*Patterns seem to share the 

[PATCH] Add Missing FSF copyright notes for some x86 intrinsic headers

2020-09-28 Thread Hongyu Wang via Gcc-patches
Hi,

Some x86 intrinsic headers is missing FSF copyright notes. This patch add
the missed notes for those headers.

OK for master?

gcc/ChangeLog:

* config/i386/amxbf16intrin.h: Add FSF copyright notes.
* config/i386/amxint8intrin.h: Ditto.
* config/i386/amxtileintrin.h: Ditto.
* config/i386/avx512vp2intersectintrin.h: Ditto.
* config/i386/avx512vp2intersectvlintrin.h: Ditto.
* config/i386/pconfigintrin.h: Ditto.
* config/i386/tsxldtrkintrin.h: Ditto.
* config/i386/wbnoinvdintrin.h: Ditto.

-- 
Regards,

Hongyu, Wang
From ec6263ba1d74953721dd274c301bdeeeb71d5e77 Mon Sep 17 00:00:00 2001
From: Hongyu Wang 
Date: Mon, 28 Sep 2020 22:22:28 +
Subject: [PATCH] Add missing FSF copyright notes for x86 intrinsic headers.

gcc/ChangeLog:

	* config/i386/amxbf16intrin.h: Add FSF copyright notes.
	* config/i386/amxint8intrin.h: Ditto.
	* config/i386/amxtileintrin.h: Ditto.
	* config/i386/avx512vp2intersectintrin.h: Ditto.
	* config/i386/avx512vp2intersectvlintrin.h: Ditto.
	* config/i386/pconfigintrin.h: Ditto.
	* config/i386/tsxldtrkintrin.h: Ditto.
	* config/i386/wbnoinvdintrin.h: Ditto.
---
 gcc/config/i386/amxbf16intrin.h  | 23 
 gcc/config/i386/amxint8intrin.h  | 23 
 gcc/config/i386/amxtileintrin.h  | 23 
 gcc/config/i386/avx512vp2intersectintrin.h   | 23 
 gcc/config/i386/avx512vp2intersectvlintrin.h | 23 
 gcc/config/i386/pconfigintrin.h  | 23 
 gcc/config/i386/tsxldtrkintrin.h | 23 
 gcc/config/i386/wbnoinvdintrin.h | 23 
 8 files changed, 184 insertions(+)

diff --git a/gcc/config/i386/amxbf16intrin.h b/gcc/config/i386/amxbf16intrin.h
index b1620963944..77cc395e86d 100644
--- a/gcc/config/i386/amxbf16intrin.h
+++ b/gcc/config/i386/amxbf16intrin.h
@@ -1,3 +1,26 @@
+/* Copyright (C) 2020 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   .  */
+
 #if !defined _IMMINTRIN_H_INCLUDED
 #error "Never use  directly; include  instead."
 #endif
diff --git a/gcc/config/i386/amxint8intrin.h b/gcc/config/i386/amxint8intrin.h
index 11adc1f1295..f4e410b6647 100644
--- a/gcc/config/i386/amxint8intrin.h
+++ b/gcc/config/i386/amxint8intrin.h
@@ -1,3 +1,26 @@
+/* Copyright (C) 2020 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   .  */
+
 #if !defined _IMMINTRIN_H_INCLUDED
 #error "Never use  directly; include  instead."
 #endif
diff --git a/gcc/config/i386/amxtileintrin.h b/gcc/config/i386/amxtileintrin.h
index e78e5c04909..41fb9a5d86a 100644
--- a/gcc/config/i386/amxtileintrin.h
+++ b/gcc/config/i386/amxtileintrin.h
@@ -1,3 +1,26 @@
+/* Copyright (C) 2020 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without 

Re: Fix handling of gimple_clobber in ipa_modref

2020-09-28 Thread Jan Hubicka
> Hi Honza,
> 
> 
> On Sat, 26 Sep 2020 at 22:03, Jan Hubicka  wrote:
> >
> > > On September 26, 2020 12:04:24 AM GMT+02:00, Jan Hubicka  
> > > wrote:
> > > >Hi,
> > > >while adding check for gimple_clobber I reversed the return value
> > > >so instead of ignoring the statement ipa-modref gives up.  Fixed thus.
> > > >This explains the drop between originally reported disambinguations
> > > >stats and ones I got later.
> > >
> > > I don't think you can ignore clobbers. They are barriers for code motion.
> >
> > Hi,
> > this is fix I have installed after lto-bootstrapping/regtesting.
> > The statistics for cc1plus are almost unchanged that is sort of expected
> > given that I only measure late optimization by getting dump from LTO.
> >
> > Thank for pointing this out, it may have triggered a nasty wrong code
> > bug :)
> >
> > Honza
> >
> > Alias oracle query stats:
> >   refs_may_alias_p: 63013346 disambiguations, 73204989 queries
> >   ref_maybe_used_by_call_p: 141350 disambiguations, 63909728 queries
> >   call_may_clobber_ref_p: 23597 disambiguations, 29430 queries
> >   nonoverlapping_component_refs_p: 0 disambiguations, 37763 queries
> >   nonoverlapping_refs_since_match_p: 19444 disambiguations, 55671 must 
> > overlaps, 75884 queries
> >   aliasing_component_refs_p: 54749 disambiguations, 753947 queries
> >   TBAA oracle: 24159888 disambiguations 56277876 queries
> >16064485 are in alias set 0
> >10340953 queries asked about the same object
> >125 queries asked about the same alias set
> >0 access volatile
> >3920604 are dependent in the DAG
> >1791821 are aritificially in conflict with void *
> >
> > Modref stats:
> >   modref use: 10444 disambiguations, 46994 queries
> >   modref clobber: 1421468 disambiguations, 1954304 queries
> >   4907798 tbaa queries (2.511277 per modref query)
> >   396785 base compares (0.203031 per modref query)
> >
> > PTA query stats:
> >   pt_solution_includes: 976073 disambiguations, 13607833 queries
> >   pt_solutions_intersect: 1026016 disambiguations, 13185678 queries
> >
> > * ipa-modref.c (analyze_stmt): Do not skip clobbers in early pass.
> > * ipa-pure-const.c (analyze_stmt): Update comment.
> >
> 
> After commit g:67a5c215940f4b21bac1aa489ce1f2fb3d52a53a (r11-3468), I
> see a failure in fortran on armeb-linux-gnueabihf,
> it's still present after r11-3491 (g4383c595ce5cc6ef6bcb45d2c9caf43002afbc4f):
> FAIL: gfortran.dg/PR94022.f90   -Os  execution test
> with GCC configured with:
> --target armeb-none-linux-gnueabihf
> --with-mode arm
> --with-cpu cortex-a9
> --with-fpu neon-fp16
> 
> Can you have a look?
I will take a look.
BTW on smaller testcases modref misoptimizations are generally not hard
to debug.  Compile with -fdump-tree-all-details and grep for modref.
It prints when it disambiguates and then it is usually easy to find the
problem (a reason why disambiguation is wrong).

For fortran my guess is another frontend bug concering array descriptor
as discussed here
https://gcc.gnu.org/pipermail/gcc-patches/2020-September/554936.html
It usually shows up as optimized out initialization of the array
descriptor just before the call.

Honza
> 
> > diff --git a/gcc/ipa-modref.c b/gcc/ipa-modref.c
> > index 73a7900883a..728c6c1523d 100644
> > --- a/gcc/ipa-modref.c
> > +++ b/gcc/ipa-modref.c
> > @@ -676,13 +676,16 @@ static bool
> >  analyze_stmt (modref_summary *summary, gimple *stmt, bool ipa,
> >   vec  *recursive_calls)
> >  {
> > -  /* There is no need to record clobbers.  */
> > -  if (gimple_clobber_p (stmt))
> > +  /* In general we can not ignore clobbers because they are barries for 
> > code
> > + motion, however after inlining it is safe to do becuase local 
> > optimization
> > + passes do not consider clobbers from other functions.
> > + Similar logic is in ipa-pure-consts.  */
> > +  if ((ipa || cfun->after_inlining) && gimple_clobber_p (stmt))
> >  return true;
> > +
> >/* Analyze all loads and stores in STMT.  */
> >walk_stmt_load_store_ops (stmt, summary,
> > analyze_load, analyze_store);
> > -  /* or call analyze_load_ipa, analyze_store_ipa */
> >
> >switch (gimple_code (stmt))
> > {
> > @@ -705,7 +708,7 @@ analyze_stmt (modref_summary *summary, gimple *stmt, 
> > bool ipa,
> > }
> >  }
> >
> > -/* Analyze function F.  IPA indicates whether we're running in tree mode 
> > (false)
> > +/* Analyze function F.  IPA indicates whether we're running in local mode 
> > (false)
> > or the IPA mode (true).  */
> >
> >  static void
> > diff --git a/gcc/ipa-pure-const.c b/gcc/ipa-pure-const.c
> > index bdbccd010dc..1af3206056e 100644
> > --- a/gcc/ipa-pure-const.c
> > +++ b/gcc/ipa-pure-const.c
> > @@ -742,6 +742,8 @@ check_stmt (gimple_stmt_iterator *gsip, funct_state 
> > local, bool ipa)
> >/* Do consider clobber as side effects before IPA, so we rather 

Re: [PATCH] irange_pool class

2020-09-28 Thread Andrew MacLeod via Gcc-patches

On 9/18/20 1:03 PM, Aldy Hernandez wrote:

On 9/18/20 6:42 PM, Andrew MacLeod wrote:
On 9/18/20 8:28 AM, David Malcolm wrote:I think of a "pool allocator" 
as something that makes a small

number of
large allocation under the covers, and then uses that to serve
large
numbers of fixed sized small allocations and deallocations with
O(1)
using a free list.

Ah, I didn't know pool had a different meaning.

See e.g. gcc/alloc-pool.h


The name originated when the original v1 version was based on using 
alloc-pool.h.  when we went to varying sizes, we switched to and 
obstack implementation  and never changed the name.

  <...>


I think it would be clearer to name this "irange_obstack", or
somesuch.

I'd prefer something more generic.  We don't want to tie the name of
the
allocator to the underlying implementation.  What if we later change
to
malloc?  We'd have to change the name to irange_malloc.
irange_allocator?  Or is there something more generically appropriate
here?

How about "irange_bump_allocator?"   Rather long, but it expresses the




"irange_allocator" is sufficient .      The consumer should not care 
what the implementation is, and we may decide to implement it 
differently down the road. So I don't want to imply something 
specific in the name or we'd have to change it again.


Updated patch attached.

Aldy


This is OK btw,

Andrew



Re: [PATCH] generalized range_query class for multiple contexts

2020-09-28 Thread Andrew MacLeod via Gcc-patches

On 9/28/20 11:27 AM, Martin Sebor wrote:

On 9/25/20 11:41 AM, Andrew MacLeod wrote:




Since you have replied to this thread, whats your opinion whether 
there should be an extra API entry point for range/value_after_stmt 
or whether that should be rolled into  the range_of_stmt routine, and 
any ssa-name specified would be the it's range after the stmt..   
perhaps renaming it to something like "range_post_stmt" and if no 
ssa-name is specified, it applies to the stmt result.


I prefer API parameterization over distinct names.  It makes APIs
easier for me to reason about, and when implementing them, often
lets me avoid duplicating code between.  A good rule of thumb is
to ask if it might ever make sense to defer the decision to call
one or the other alternative until runtime.  If the answer isn't
an unequivocal "no" then a single entry point with a parameter is
probably a better choice.

By the way, your question about API design makes me think about
something else.  A range is a generalization of a singleton value
(it's a collection of singletons, which is particularly true with
the new multi-range design).  Conversely, a singleton value is
a specialization of a range.  It always makes sense to ask for
the range of an expression, even if it evaluates to a constant.
It only makes sense to ask for its value if it's know to evaluate
to a constant.  So if it's expected to be useful to make
a distinction between the two concepts in the type system I would
think it natural to model the relationship by having value_query
derive from range_query rather than the other way around.



We've considered a single class for queries rather than the existing 
derived structure, but it all depends on your point of view.


You can have values without knowing about ranges, but you can't have 
ranges without knowing about values.  Its persepective, so there isnt a 
really a "right" answer. Either can be implemented in terms of the other.


Lots of places in the compiler deal with value callbacks, only a few 
know or care about ranges, and those usually care about both ranges and 
values... thus the current structure.


Regardless, its a starting point and easy to combine as a single class 
later should that be worthwhile.


Andrew







Re: [PATCH 4/4] libstdc++: Rearrange some range adaptors' data members

2020-09-28 Thread Jonathan Wakely via Gcc-patches

On 28/09/20 09:11 -0400, Patrick Palka via Libstdc++ wrote:

On Mon, 28 Sep 2020, Jonathan Wakely wrote:


On 28/09/20 00:48 -0400, Patrick Palka via Libstdc++ wrote:
> Since the standard range adaptors are specified to derive from the empty
> class view_base, making their first data member store the underlying
> view is suboptimal, for if the underlying view also derives from
> view_base then the two view_base subobjects will be adjacent, thus
> preventing the compiler from applying the empty base optimization to
> elide away the storage for these two empty bases.
>
> This patch improves the situation by declaring the _M_base data member
> last instead of first in each range adaptor that has more than one data
> member, so that the empty base optimization can apply more often.
>
> Tested on x86_64-pc-linux-gnu with and wihout -m32.
>
> libstdc++-v3/ChangeLog:
>
>* include/std/ranges (filter_view::_M_base): Declare this data
>member last.
>(transform_view::_M_base): Likewise.
>(take_view::_M_base): Likewise.
>(take_while_view::_M_base): Likewise.
>(drop_view::_M_base): Likewise.
>(drop_while_view::_M_base): Likewise.
>(join_view::_M_base): Likewise.
>(split_view::_M_base): Likewise.
>* testsuite/std/ranges/adaptors/sizeof.cc: Adjust expected
>sizes.
> ---
> libstdc++-v3/include/std/ranges| 17 -
> .../testsuite/std/ranges/adaptors/sizeof.cc| 18 +-
> 2 files changed, 17 insertions(+), 18 deletions(-)
>
> diff --git a/libstdc++-v3/include/std/ranges
> b/libstdc++-v3/include/std/ranges
> index 964a2b616a6..6fd8a85c2bf 100644
> --- a/libstdc++-v3/include/std/ranges
> +++ b/libstdc++-v3/include/std/ranges
> @@ -1250,9 +1250,9 @@ namespace views
>{ return __y.__equal(__x); }
>   };
>
> -  _Vp _M_base = _Vp();
>   [[no_unique_address]] __detail::__box<_Pred> _M_pred;
>   [[no_unique_address]] __detail::_CachedPosition<_Vp> _M_cached_begin;
> +  _Vp _M_base = _Vp();

The constructor's mem-initializer-list needs to be reordered, to avoid
warnings with -Wsystem-headers.


Oops, fixed thusly.  I noticed meanwhile that I forgot to move
reverse_view's _M_base member, so the below patch adds that as well.


Nice to see testsuite/std/ranges/adaptors/sizeof.cc already proving
useful by demonstrating the reduced sizes.


Tested on x86_64-pc-linux-gnu, and also verified that there are no new
-Wsystem-headers warnings.


OK, thanks.



Re: Fix handling of gimple_clobber in ipa_modref

2020-09-28 Thread Christophe Lyon via Gcc-patches
Hi Honza,


On Sat, 26 Sep 2020 at 22:03, Jan Hubicka  wrote:
>
> > On September 26, 2020 12:04:24 AM GMT+02:00, Jan Hubicka  
> > wrote:
> > >Hi,
> > >while adding check for gimple_clobber I reversed the return value
> > >so instead of ignoring the statement ipa-modref gives up.  Fixed thus.
> > >This explains the drop between originally reported disambinguations
> > >stats and ones I got later.
> >
> > I don't think you can ignore clobbers. They are barriers for code motion.
>
> Hi,
> this is fix I have installed after lto-bootstrapping/regtesting.
> The statistics for cc1plus are almost unchanged that is sort of expected
> given that I only measure late optimization by getting dump from LTO.
>
> Thank for pointing this out, it may have triggered a nasty wrong code
> bug :)
>
> Honza
>
> Alias oracle query stats:
>   refs_may_alias_p: 63013346 disambiguations, 73204989 queries
>   ref_maybe_used_by_call_p: 141350 disambiguations, 63909728 queries
>   call_may_clobber_ref_p: 23597 disambiguations, 29430 queries
>   nonoverlapping_component_refs_p: 0 disambiguations, 37763 queries
>   nonoverlapping_refs_since_match_p: 19444 disambiguations, 55671 must 
> overlaps, 75884 queries
>   aliasing_component_refs_p: 54749 disambiguations, 753947 queries
>   TBAA oracle: 24159888 disambiguations 56277876 queries
>16064485 are in alias set 0
>10340953 queries asked about the same object
>125 queries asked about the same alias set
>0 access volatile
>3920604 are dependent in the DAG
>1791821 are aritificially in conflict with void *
>
> Modref stats:
>   modref use: 10444 disambiguations, 46994 queries
>   modref clobber: 1421468 disambiguations, 1954304 queries
>   4907798 tbaa queries (2.511277 per modref query)
>   396785 base compares (0.203031 per modref query)
>
> PTA query stats:
>   pt_solution_includes: 976073 disambiguations, 13607833 queries
>   pt_solutions_intersect: 1026016 disambiguations, 13185678 queries
>
> * ipa-modref.c (analyze_stmt): Do not skip clobbers in early pass.
> * ipa-pure-const.c (analyze_stmt): Update comment.
>

After commit g:67a5c215940f4b21bac1aa489ce1f2fb3d52a53a (r11-3468), I
see a failure in fortran on armeb-linux-gnueabihf,
it's still present after r11-3491 (g4383c595ce5cc6ef6bcb45d2c9caf43002afbc4f):
FAIL: gfortran.dg/PR94022.f90   -Os  execution test
with GCC configured with:
--target armeb-none-linux-gnueabihf
--with-mode arm
--with-cpu cortex-a9
--with-fpu neon-fp16

Can you have a look?

> diff --git a/gcc/ipa-modref.c b/gcc/ipa-modref.c
> index 73a7900883a..728c6c1523d 100644
> --- a/gcc/ipa-modref.c
> +++ b/gcc/ipa-modref.c
> @@ -676,13 +676,16 @@ static bool
>  analyze_stmt (modref_summary *summary, gimple *stmt, bool ipa,
>   vec  *recursive_calls)
>  {
> -  /* There is no need to record clobbers.  */
> -  if (gimple_clobber_p (stmt))
> +  /* In general we can not ignore clobbers because they are barries for code
> + motion, however after inlining it is safe to do becuase local 
> optimization
> + passes do not consider clobbers from other functions.
> + Similar logic is in ipa-pure-consts.  */
> +  if ((ipa || cfun->after_inlining) && gimple_clobber_p (stmt))
>  return true;
> +
>/* Analyze all loads and stores in STMT.  */
>walk_stmt_load_store_ops (stmt, summary,
> analyze_load, analyze_store);
> -  /* or call analyze_load_ipa, analyze_store_ipa */
>
>switch (gimple_code (stmt))
> {
> @@ -705,7 +708,7 @@ analyze_stmt (modref_summary *summary, gimple *stmt, bool 
> ipa,
> }
>  }
>
> -/* Analyze function F.  IPA indicates whether we're running in tree mode 
> (false)
> +/* Analyze function F.  IPA indicates whether we're running in local mode 
> (false)
> or the IPA mode (true).  */
>
>  static void
> diff --git a/gcc/ipa-pure-const.c b/gcc/ipa-pure-const.c
> index bdbccd010dc..1af3206056e 100644
> --- a/gcc/ipa-pure-const.c
> +++ b/gcc/ipa-pure-const.c
> @@ -742,6 +742,8 @@ check_stmt (gimple_stmt_iterator *gsip, funct_state 
> local, bool ipa)
>/* Do consider clobber as side effects before IPA, so we rather inline
>   C++ destructors and keep clobber semantics than eliminate them.
>
> + Similar logic is in ipa-modref.
> +
>   TODO: We may get smarter during early optimizations on these and let
>   functions containing only clobbers to be optimized more.  This is a 
> common
>   case of C++ destructors.  */


Re: [PATCH] generalized range_query class for multiple contexts

2020-09-28 Thread Martin Sebor via Gcc-patches

On 9/25/20 11:41 AM, Andrew MacLeod wrote:

On 9/23/20 7:53 PM, Martin Sebor via Gcc-patches wrote:

On 9/18/20 12:38 PM, Aldy Hernandez via Gcc-patches wrote:
As part of the ranger work, we have been trying to clean up and 
generalize interfaces whenever possible. This not only helps in 
reducing the maintenance burden going forward, but provides 
mechanisms for backwards compatibility between ranger and other 
providers/users of ranges throughout the compiler like evrp and VRP.


One such interface is the range_query class in vr_values.h, which 
provides a range query mechanism for use in the simplify_using_ranges 
module.  With it, simplify_using_ranges can be used with the ranger, 
or the VRP twins by providing a get_value_range() method.  This has 
helped us in comparing apples to apples while doing our work, and has 
also future proofed the interface so that asking for a range can be 
done within the context in which it appeared.  For example, 
get_value_range now takes a gimple statement which provides context. 
We are no longer tied to asking for a global SSA range, but can ask 
for the range of an SSA within a statement. Granted, this 
functionality is currently only in the ranger, but evrp/vrp could be 
adapted to pass such context.


The range_query is a good first step, but what we really want is a 
generic query mechanism that can ask for SSA ranges within an 
expression, a statement, an edge, or anything else that may come up. 
We think that a generic mechanism can be used not only for range 
producers, but consumers such as the substitute_and_fold_engine (see 
get_value virtual) and possibly the gimple folder (see valueize).


The attached patchset provides such an interface.  It is meant to be 
a replacement for range_query that can be used for vr_values, 
substitute_and_fold, the subsitute_and_fold_engine, as well as the 
ranger.  The general API is:


class value_query
{
public:
   // Return the singleton expression for NAME at a gimple statement,
   // or NULL if none found.
   virtual tree value_of_expr (tree name, gimple * = NULL) = 0;
   // Return the singleton expression for NAME at an edge, or NULL if
   // none found.
   virtual tree value_on_edge (edge, tree name);
   // Return the singleton expression for the LHS of a gimple
   // statement, assuming an (optional) initial value of NAME. Returns
   // NULL if none found.
   //
   // Note this method calculates the range the LHS would have *after*
   // the statement has executed.
   virtual tree value_of_stmt (gimple *, tree name = NULL);
};

class range_query : public value_query
{
public:
   range_query ();
   virtual ~range_query ();

   virtual tree value_of_expr (tree name, gimple * = NULL) OVERRIDE;
   virtual tree value_on_edge (edge, tree name) OVERRIDE;
   virtual tree value_of_stmt (gimple *, tree name = NULL) OVERRIDE;

   // These are the range equivalents of the value_* methods. Instead
   // of returning a singleton, they calculate a range and return it in
   // R.  TRUE is returned on success or FALSE if no range was found.
   virtual bool range_of_expr (irange , tree name, gimple * = NULL) 
= 0;

   virtual bool range_on_edge (irange , edge, tree name);
   virtual bool range_of_stmt (irange , gimple *, tree name = NULL);

   // DEPRECATED: This method is used from vr-values.  The plan is to
   // rewrite all uses of it to the above API.
   virtual const class value_range_equiv *get_value_range (const_tree,
   gimple * = NULL);
};

The duality of the API (value_of_* and range_on_*) is because some 
passes are interested in a singleton value 
(substitute_and_fold_enginge), while others are interested in ranges 
(vr_values).  Passes that are only interested in singletons can take 
a value_query, while passes that are interested in full ranges, can 
take a range_query.  Of course, for future proofing, we would 
recommend taking a range_query, since if you provide a default 
range_of_expr, sensible defaults will be provided for the others in 
terms of range_of_expr.


Note, that the absolute bare minimum that must be provided is a 
value_of_expr and a range_of_expr respectively.


One piece of the API which is missing is a method  to return the 
range of an arbitrary SSA_NAME *after* a statement.  Currently 
range_of_expr calculates the range of an expression upon entry to the 
statement, whereas range_of_stmt calculates the range of *only* the 
LHS of a statement AFTER the statement has executed.


This would allow for complete representation of the ranges/values in 
something like:


 d_4 = *g_7;

Here the range of g_7 upon entry could be VARYING, but after the 
dereference we know it must be non-zero.  Well for sane targets anyhow.


Choices would be to:

   1) add a 4th method such as "range_after_stmt", or

   2) merge that functionality with the existing range_of_stmt method 
to provide "after" functionality for any ssa_name. Currently the 
SSA_NAME must be the same as the LHS if 

RE: [PATCH v2 3/16]middle-end Add basic SLP pattern matching scaffolding.

2020-09-28 Thread Tamar Christina
Hi Richi,

Thanks for the review! 

Just some answers to your questions:

> -Original Message-
> From: rguent...@c653.arch.suse.de  On
> Behalf Of Richard Biener
> Sent: Monday, September 28, 2020 1:37 PM
> To: Tamar Christina 
> Cc: gcc-patches@gcc.gnu.org; nd ; o...@ucw.cz
> Subject: Re: [PATCH v2 3/16]middle-end Add basic SLP pattern matching
> scaffolding.
> 
> On Fri, 25 Sep 2020, Tamar Christina wrote:
> 
> > Hi All,
> >
> > This patch adds the basic infrastructure for doing pattern matching on SLP
> trees.
> > This is done immediately after the SLP tree creation because it can
> > change the shape of the tree in radical ways and so we would like to
> > do it before any analysis is performed on the tree.
> >
> > A new file tree-vect-slp-patterns.c is added which contains all the
> > code for pattern matching on SLP trees.
> >
> > This cover letter is short because the changes are heavily commented.
> >
> > All pattern matchers need to implement the abstract type
> VectPatternMatch.
> > The VectSimplePatternMatch abstract class provides some default
> > functionality for pattern matchers that need to rebuild nodes.
> >
> > The pattern matcher requires if replacing a statement in a node, that
> > ALL statements be replaced.
> >
> > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> >
> > Ok for master?
> 
> +gcall *build ()
> +{
> +  stmt_vec_info stmt_info;
> +
> 
> please define functions out-of-line (apart from the 1-liners)
> 
> +  /* We have to explicitly mark the old statement as unused because
> during
> +statement analysis the original and new pattern statement may
> require
> +different level of unrolling.  As an example add/sub when
> vectorized
> +without a pattern requires 4 copies, whereas with a COMPLEX_ADD
> pattern
> +this only requires 2 copies and the two statement will be
> + treated
> as
> +hand unrolled.  That means that the analysis won't happen as
> it'll find
> +a mismatch.  So we don't analyze the old statement and if we
> + end
> up
> +needing it, e.g. SLP fails then we have to quickly re-analyze it.
> */
> +  STMT_VINFO_RELEVANT (stmt_info) = vect_unused_in_scope;
> +  STMT_VINFO_SLP_VECT_ONLY (call_stmt_info) = true;
> +  STMT_VINFO_RELATED_STMT (call_stmt_info) = stmt_info;
> 
> so this means all uses have to be inside the pattern as otherwise there may
> be even non-SLP uses.  vect_mark_pattern_stmts supports detecting
> patterns of patterns, I suppose the two-phase analysis for SLP patterns does
> not support this right now?
> 
> +  SLP_TREE_CODE (this->m_node) = gimple_expr_code (call_stmt);;
> 
> double ;, just make it CALL_EXPR literally (or leave it ERROR_MARK)
> 
> You seem to do in-place changing of the SLP node you match off?

Yes since this would allow me to change the root node as well, though
thinking about it I can probably do it by passing it as a reference which
then would allow me to re-use vect_create_new_slp_node which is
probably preferable. 

> 
> @@ -2192,6 +2378,17 @@ vect_analyze_slp_instance (vec_info *vinfo,
>   _size, bst_map);
>if (node != NULL)
>  {
> +  /* Temporarily allow add_stmt calls again.  */
> +  vinfo->stmt_vec_info_ro = false;
> +
> +  /* See if any patterns can be found in the constructed SLP tree
> +before we do any analysis on it.  */
> +  vect_match_slp_patterns (node, vinfo, group_size, _nunits,
> +  matches, , _size,
> + bst_map);
> +
> +  /* After this no more add_stmt calls are allowed.  */
> +  vinfo->stmt_vec_info_ro = true;
> +
> 
> I think this is a bit early to match patterns - I'd defer it to the point 
> where all
> entries into the same SLP subgraph are analyzed, thus somewhere at the
> end of vect_analyze_slp loop over all instances and match patterns?  That
> way phases are more clearly separated.

That would probably work, my only worry is that the SLP analysis itself may 
fail and
bail out at 

  /* If the loads and stores can be handled with load/store-lane
 instructions do not generate this SLP instance.  */
  if (is_a  (vinfo)
  && loads_permuted
  && dr && vect_store_lanes_supported (vectype, group_size, false))

Which in the initial tree may be true, but in the patterned tree may not be.  
In the previous
revision of the patch you had suggested I return a boolean which can be used to 
cancel such
checks.  Would that be the preferred approach?

> 
> Note that fiddling with vinfo->stmt_vec_info_ro is a bit ugly, maybe add a -
> >add_pattern_stmt (gimple *pattern_stmt, stmt_vec_info
> orig_stmt) variant that also sets STMT_VINFO_RELATED_STMT but doesn't
> check !stmt_vec_info_ro.  That could be used from tree-vect-patterns.c as
> well and we could set stmt_vec_info_ro earlier.
> 
> +  VectPattern *pattern = patt_fn (node, vinfo);  uint8_t n =
> + 

Re: [PATCH] amdgcn, nvptx: Disable OMP barriers in nested teams

2020-09-28 Thread Tom de Vries
On 9/28/20 4:17 PM, Andrew Stubbs wrote:
> On 28/09/2020 15:02, Tom de Vries wrote:
>>> This patch simply skips barriers when they would "wait" for only one
>>> thread (the current thread). This means that teams nested inside other
>>> teams now run independently, instead of strictly in lock-step, and is
>>> only valid as long as inner teams are limited to one thread each
>>> (currently the case).
>>
>> Is this inner-team-one-thread-limit coded or documented somewhere?
> 
> In libgomp/parallel.c, gomp_resolve_num_threads we have:
> 
>   else if (thr->ts.active_level >= 1 && !icv->nest_var)
>     return 1;
> 
>> If so, it might be good to add a comment there referring to the code
>> this patch adds.
> 
>   /* Accelerators with fixed thread counts require this to return 1 for
>  nested parallel regions.  */
> 
> WDYT?

Yep, looks good, thanks.
- Tom


Re: [PATCH] amdgcn, nvptx: Disable OMP barriers in nested teams

2020-09-28 Thread Andrew Stubbs

On 28/09/2020 15:02, Tom de Vries wrote:

This patch simply skips barriers when they would "wait" for only one
thread (the current thread). This means that teams nested inside other
teams now run independently, instead of strictly in lock-step, and is
only valid as long as inner teams are limited to one thread each
(currently the case).


Is this inner-team-one-thread-limit coded or documented somewhere?


In libgomp/parallel.c, gomp_resolve_num_threads we have:

  else if (thr->ts.active_level >= 1 && !icv->nest_var)
return 1;


If so, it might be good to add a comment there referring to the code
this patch adds.


  /* Accelerators with fixed thread counts require this to return 1 for
 nested parallel regions.  */

WDYT?

Andrew


Re: dg-options after board/cflags

2020-09-28 Thread Jose E. Marchesi via Gcc-patches


>> Your patch dealt with board/multilib_flags, but the same problem exists
>> for board/cflags and many other flag-containing options.
>
>  What's the use case for that?  IIUC board flags are supposed to be ones 
> that are absolutely required for executables to run with a given board, 
> such as multilib selection, special linker scripts, non-standard run-time 
> library paths, etc.  These must not be overridden by test cases or they 
> will inevitably fail.

We want to build most bpf-unknown-none test files with -mxbpf, but not
all.

I wasn't aware of the "absolutely require" nature of the board
flags... then it is clear we need to find another strategy, like
specifying it in the individual test files/dg-options.


Re: [PATCH] amdgcn, nvptx: Disable OMP barriers in nested teams

2020-09-28 Thread Tom de Vries
On 9/18/20 1:25 PM, Andrew Stubbs wrote:
> This patch fixes a problem in which nested OpenMP parallel regions cause
> errors if the number of inner teams is not balanced (i.e. the number of
> loop iterations is not divisible by the number of physical threads). A
> testcase is included.
> 
> On NVPTX the symptom was a fatal error:
> 
> libgomp: cuCtxSynchronize error: an illegal instruction was encountered
> 
> This was caused by mismatched "bar.sync" instructions (one waiting for
> 32 threads while another is waiting for 256). The source of the mismatch
> being that some threads were still busy while others had run out of work
> to do.
> 
> On GCN there was no such error (GCN barriers always wait for all
> threads), but it worked only by chance: the idle threads were "matching"
> different barriers to the busy threads, but it was harmless because the
> thread function pointer remained NULL.
> 
> This patch simply skips barriers when they would "wait" for only one
> thread (the current thread). This means that teams nested inside other
> teams now run independently, instead of strictly in lock-step, and is
> only valid as long as inner teams are limited to one thread each
> (currently the case).

Is this inner-team-one-thread-limit coded or documented somewhere?

If so, it might be good to add a comment there referring to the code
this patch adds.

Follow-up patch is OK, thanks.
- Tom

> When the inner regions exit then the barriers for
> the outer region will sync everything up again.
> 
> OK to commit?
> 
> Andrew
> 
> P.S. I can approve the amdgcn portion myself; I'm seeking approval for
> the nvptx portion.


Re: [PATCH] libstdc++: Add C++2a synchronization support

2020-09-28 Thread Jonathan Wakely via Gcc-patches

On 11/09/20 16:58 -0700, Thomas Rodgers wrote:

From: Thomas Rodgers 

This patch supercedes both the Add C++2a synchronization support patch
being replied to *and* the patch adding wait/notify_* to atomic_flag.

Add support for -
 * atomic_flag::wait/notify_one/notify_all
 * atomic::wait/notify_one/notify_all
 * counting_semaphore
 * binary_semaphore
 * latch

libstdc++-v3/ChangeLog:

* include/Makefile.am (bits_headers): Add new header.
* include/Makefile.in: Regenerate.
* include/bits/atomic_base.h (__atomic_flag::wait): Define.
(__atomic_flag::notify_one): Likewise.
(__atomic_flag::notify_all): Likewise.
(__atomic_base<_Itp>::wait): Likewise.
(__atomic_base<_Itp>::notify_one): Likewise.
(__atomic_base<_Itp>::notify_all): Likewise.
(__atomic_base<_Ptp*>::wait): Likewise.
(__atomic_base<_Ptp*>::notify_one): Likewise.
(__atomic_base<_Ptp*>::notify_all): Likewise.
(__atomic_impl::wait): Likewise.
(__atomic_impl::notify_one): Likewise.
(__atomic_impl::notify_all): Likewise.
(__atomic_float<_Fp>::wait): Likewise.
(__atomic_float<_Fp>::notify_one): Likewise.
(__atomic_float<_Fp>::notify_all): Likewise.
(__atomic_ref<_Tp>::wait): Likewise.
(__atomic_ref<_Tp>::notify_one): Likewise.
(__atomic_ref<_Tp>::notify_all): Likewise.
(atomic_wait<_Tp>): Likewise.
(atomic_wait_explicit<_Tp>): Likewise.
(atomic_notify_one<_Tp>): Likewise.
(atomic_notify_all<_Tp>): Likewise.
* include/bits/atomic_wait.h: New file.
* include/bits/atomic_timed_wait.h: New file.
* include/bits/semaphore_base.h: New file.
* include/std/atomic (atomic::wait): Define.
(atomic::wait_one): Likewise.
(atomic::wait_all): Likewise.
(atomic<_Tp>::wait): Likewise.
(atomic<_Tp>::wait_one): Likewise.
(atomic<_Tp>::wait_all): Likewise.
(atomic<_Tp*>::wait): Likewise.
(atomic<_Tp*>::wait_one): Likewise.
(atomic<_Tp*>::wait_all): Likewise.
* include/std/latch: New file.
* include/std/semaphore: New file.
* include/std/version: Add __cpp_lib_semaphore and
__cpp_lib_latch defines.
* testsuite/29_atomic/atomic/wait_notify/atomic_refs.cc: New test.
* testsuite/29_atomic/atomic/wait_notify/bool.cc: Likewise.
* testsuite/29_atomic/atomic/wait_notify/integrals.cc: Likewise.
* testsuite/29_atomic/atomic/wait_notify/floats.cc: Likewise.
* testsuite/29_atomic/atomic/wait_notify/pointers.cc: Likewise.
* testsuite/29_atomic/atomic/wait_notify/generic.cc: Liekwise.
* testsuite/29_atomic/atomic/wait_notify/generic.h: New File.
* testsuite/29_atomics/atomic_flag/wait_notify/1.cc: New test.
* testsuite/30_thread/semaphore/1.cc: New test.
* testsuite/30_thread/semaphore/2.cc: Likewise.
* testsuite/30_thread/semaphore/least_max_value_neg.cc: Likewise.
* testsuite/30_thread/semaphore/try_acquire.cc: Likewise.
* testsuite/30_thread/semaphore/try_acquire_for.cc: Likewise.
* testsuite/30_thread/semaphore/try_acquire_posix.cc: Likewise.
* testsuite/30_thread/semaphore/try_acquire_until.cc: Likewise.
* testsuite/30_thread/latch/1.cc: New test.
* testsuite/30_thread/latch/2.cc: New test.
* testsuite/30_thread/latch/3.cc: New test.
---
libstdc++-v3/include/Makefile.am  |   5 +
libstdc++-v3/include/Makefile.in  |   5 +
libstdc++-v3/include/bits/atomic_base.h   | 195 +++-
libstdc++-v3/include/bits/atomic_timed_wait.h | 281 
libstdc++-v3/include/bits/atomic_wait.h   | 301 ++
libstdc++-v3/include/bits/semaphore_base.h| 283 
libstdc++-v3/include/std/atomic   |  73 +
libstdc++-v3/include/std/latch|  90 ++
libstdc++-v3/include/std/semaphore|  92 ++
libstdc++-v3/include/std/version  |   2 +
.../atomic/wait_notify/atomic_refs.cc | 103 ++
.../29_atomics/atomic/wait_notify/bool.cc |  59 
.../29_atomics/atomic/wait_notify/floats.cc   |  32 ++
.../29_atomics/atomic/wait_notify/generic.cc  |  31 ++
.../29_atomics/atomic/wait_notify/generic.h   | 160 ++
.../atomic/wait_notify/integrals.cc   |  65 
.../29_atomics/atomic/wait_notify/pointers.cc |  59 
.../29_atomics/atomic_flag/wait_notify/1.cc   |  61 
libstdc++-v3/testsuite/30_threads/latch/1.cc  |  27 ++
libstdc++-v3/testsuite/30_threads/latch/2.cc  |  27 ++
libstdc++-v3/testsuite/30_threads/latch/3.cc  |  50 +++
.../testsuite/30_threads/semaphore/1.cc   |  27 ++
.../testsuite/30_threads/semaphore/2.cc   |  27 ++
.../semaphore/least_max_value_neg.cc  |  30 ++
.../30_threads/semaphore/try_acquire.cc   |  55 
.../30_threads/semaphore/try_acquire_for.cc   |  85 +

RE: [PATCH v2 9/16][docs] Add some missing test directive documentaion.

2020-09-28 Thread Tamar Christina
Hi Sandra,

> -Original Message-
> From: Sandra Loosemore 
> Sent: Friday, September 25, 2020 10:35 PM
> To: Tamar Christina ; gcc-patches@gcc.gnu.org
> Cc: nd ; jos...@codesourcery.com
> Subject: Re: [PATCH v2 9/16][docs] Add some missing test directive
> documentaion.
> 
> On 9/25/20 8:29 AM, Tamar Christina wrote:
> > Hi All,
> >
> > This adds some documentation for some test directives that are missing.
> >
> > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> >
> > Ok for master?
> >
> > Thanks,
> > Tamar
> >
> > gcc/ChangeLog:
> >
> > * doc/sourcebuild.texi (vect_complex_rot_,
> > arm_v8_3a_complex_neon_ok, arm_v8_3a_complex_neon_hw):
> New.
> >
> > diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi index
> >
> 65b2e552b74becdbc5474ba5ac387a4a0296e341..3abd8f631cb0234076641e399
> f6f
> > 00768b38ebee 100644
> > --- a/gcc/doc/sourcebuild.texi
> > +++ b/gcc/doc/sourcebuild.texi
> > @@ -1671,6 +1671,10 @@ Target supports a vector dot-product of
> @code{signed short}.
> >  @item vect_udot_hi
> >  Target supports a vector dot-product of @code{unsigned short}.
> >
> > +@item vect_complex_rot_@var{n}
> > +Target supports a vector complex addition and complex fma of mode
> @var{N}.
> > +Possible values of @var{n} are @code{hf}, @code{sf}, @code{df}.
> > +
> 
> Well, "fma" isn't a word.  But looking at target-supports.exp, this 
> description
> doesn't match what's in the source code anyway; there it says this is for
> "vector complex addition with rotate", not fused multiply-add.
> 

I don't currently differentiate between the operations (as in supporting one 
requires supporting the other), which I probably should..
But you're right I'll make this match what's on the tin 

Regards,
Tamar
> 
> > +@item arm_v8_3a_complex_neon_hw
> > +ARM target supports executing complex arithmetic instructions from
> ARMv8.3-A.
> > +Some multilibs may be incompatible with these options.
> > +Implies arm_v8_3a_complex_neon_ok.
> > +
> 
> There should be @code markup on arm_v8_3a_complex_neon_ok at the
> end.  I noticed more existing instances of missing @code markup in similar
> language for other entries in this table; can you fix those at the same time, 
> or
> in a separate patch?  I consider fixing markup issues like that to be obvious
> (especially in internal documentation rather than the GCC user manual), so
> you can just check in fixes like that without waiting for review.
> 
> -Sandra


Re: [PATCH] libstdc++: Add C++2a synchronization support

2020-09-28 Thread Jonathan Wakely via Gcc-patches

On 11/09/20 16:58 -0700, Thomas Rodgers wrote:

From: Thomas Rodgers 

This patch supercedes both the Add C++2a synchronization support patch
being replied to *and* the patch adding wait/notify_* to atomic_flag.

Add support for -
 * atomic_flag::wait/notify_one/notify_all
 * atomic::wait/notify_one/notify_all
 * counting_semaphore
 * binary_semaphore
 * latch

libstdc++-v3/ChangeLog:

* include/Makefile.am (bits_headers): Add new header.
* include/Makefile.in: Regenerate.
* include/bits/atomic_base.h (__atomic_flag::wait): Define.
(__atomic_flag::notify_one): Likewise.
(__atomic_flag::notify_all): Likewise.
(__atomic_base<_Itp>::wait): Likewise.
(__atomic_base<_Itp>::notify_one): Likewise.
(__atomic_base<_Itp>::notify_all): Likewise.
(__atomic_base<_Ptp*>::wait): Likewise.
(__atomic_base<_Ptp*>::notify_one): Likewise.
(__atomic_base<_Ptp*>::notify_all): Likewise.
(__atomic_impl::wait): Likewise.
(__atomic_impl::notify_one): Likewise.
(__atomic_impl::notify_all): Likewise.
(__atomic_float<_Fp>::wait): Likewise.
(__atomic_float<_Fp>::notify_one): Likewise.
(__atomic_float<_Fp>::notify_all): Likewise.
(__atomic_ref<_Tp>::wait): Likewise.
(__atomic_ref<_Tp>::notify_one): Likewise.
(__atomic_ref<_Tp>::notify_all): Likewise.
(atomic_wait<_Tp>): Likewise.
(atomic_wait_explicit<_Tp>): Likewise.
(atomic_notify_one<_Tp>): Likewise.
(atomic_notify_all<_Tp>): Likewise.
* include/bits/atomic_wait.h: New file.
* include/bits/atomic_timed_wait.h: New file.
* include/bits/semaphore_base.h: New file.
* include/std/atomic (atomic::wait): Define.
(atomic::wait_one): Likewise.
(atomic::wait_all): Likewise.
(atomic<_Tp>::wait): Likewise.
(atomic<_Tp>::wait_one): Likewise.
(atomic<_Tp>::wait_all): Likewise.
(atomic<_Tp*>::wait): Likewise.
(atomic<_Tp*>::wait_one): Likewise.
(atomic<_Tp*>::wait_all): Likewise.
* include/std/latch: New file.
* include/std/semaphore: New file.
* include/std/version: Add __cpp_lib_semaphore and
__cpp_lib_latch defines.
* testsuite/29_atomic/atomic/wait_notify/atomic_refs.cc: New test.
* testsuite/29_atomic/atomic/wait_notify/bool.cc: Likewise.
* testsuite/29_atomic/atomic/wait_notify/integrals.cc: Likewise.
* testsuite/29_atomic/atomic/wait_notify/floats.cc: Likewise.
* testsuite/29_atomic/atomic/wait_notify/pointers.cc: Likewise.
* testsuite/29_atomic/atomic/wait_notify/generic.cc: Liekwise.
* testsuite/29_atomic/atomic/wait_notify/generic.h: New File.
* testsuite/29_atomics/atomic_flag/wait_notify/1.cc: New test.
* testsuite/30_thread/semaphore/1.cc: New test.
* testsuite/30_thread/semaphore/2.cc: Likewise.
* testsuite/30_thread/semaphore/least_max_value_neg.cc: Likewise.
* testsuite/30_thread/semaphore/try_acquire.cc: Likewise.
* testsuite/30_thread/semaphore/try_acquire_for.cc: Likewise.
* testsuite/30_thread/semaphore/try_acquire_posix.cc: Likewise.
* testsuite/30_thread/semaphore/try_acquire_until.cc: Likewise.
* testsuite/30_thread/latch/1.cc: New test.
* testsuite/30_thread/latch/2.cc: New test.
* testsuite/30_thread/latch/3.cc: New test.
---
libstdc++-v3/include/Makefile.am  |   5 +
libstdc++-v3/include/Makefile.in  |   5 +
libstdc++-v3/include/bits/atomic_base.h   | 195 +++-
libstdc++-v3/include/bits/atomic_timed_wait.h | 281 
libstdc++-v3/include/bits/atomic_wait.h   | 301 ++
libstdc++-v3/include/bits/semaphore_base.h| 283 
libstdc++-v3/include/std/atomic   |  73 +
libstdc++-v3/include/std/latch|  90 ++
libstdc++-v3/include/std/semaphore|  92 ++
libstdc++-v3/include/std/version  |   2 +
.../atomic/wait_notify/atomic_refs.cc | 103 ++
.../29_atomics/atomic/wait_notify/bool.cc |  59 
.../29_atomics/atomic/wait_notify/floats.cc   |  32 ++
.../29_atomics/atomic/wait_notify/generic.cc  |  31 ++
.../29_atomics/atomic/wait_notify/generic.h   | 160 ++
.../atomic/wait_notify/integrals.cc   |  65 
.../29_atomics/atomic/wait_notify/pointers.cc |  59 
.../29_atomics/atomic_flag/wait_notify/1.cc   |  61 
libstdc++-v3/testsuite/30_threads/latch/1.cc  |  27 ++
libstdc++-v3/testsuite/30_threads/latch/2.cc  |  27 ++
libstdc++-v3/testsuite/30_threads/latch/3.cc  |  50 +++
.../testsuite/30_threads/semaphore/1.cc   |  27 ++
.../testsuite/30_threads/semaphore/2.cc   |  27 ++
.../semaphore/least_max_value_neg.cc  |  30 ++
.../30_threads/semaphore/try_acquire.cc   |  55 
.../30_threads/semaphore/try_acquire_for.cc   |  85 +

Re: [PATCH] libstdc++: Add C++2a synchronization support

2020-09-28 Thread Jonathan Wakely via Gcc-patches

On 11/09/20 16:58 -0700, Thomas Rodgers wrote:

From: Thomas Rodgers 

This patch supercedes both the Add C++2a synchronization support patch
being replied to *and* the patch adding wait/notify_* to atomic_flag.

Add support for -
 * atomic_flag::wait/notify_one/notify_all
 * atomic::wait/notify_one/notify_all
 * counting_semaphore
 * binary_semaphore
 * latch

libstdc++-v3/ChangeLog:

* include/Makefile.am (bits_headers): Add new header.
* include/Makefile.in: Regenerate.
* include/bits/atomic_base.h (__atomic_flag::wait): Define.
(__atomic_flag::notify_one): Likewise.
(__atomic_flag::notify_all): Likewise.
(__atomic_base<_Itp>::wait): Likewise.
(__atomic_base<_Itp>::notify_one): Likewise.
(__atomic_base<_Itp>::notify_all): Likewise.
(__atomic_base<_Ptp*>::wait): Likewise.
(__atomic_base<_Ptp*>::notify_one): Likewise.
(__atomic_base<_Ptp*>::notify_all): Likewise.
(__atomic_impl::wait): Likewise.
(__atomic_impl::notify_one): Likewise.
(__atomic_impl::notify_all): Likewise.
(__atomic_float<_Fp>::wait): Likewise.
(__atomic_float<_Fp>::notify_one): Likewise.
(__atomic_float<_Fp>::notify_all): Likewise.
(__atomic_ref<_Tp>::wait): Likewise.
(__atomic_ref<_Tp>::notify_one): Likewise.
(__atomic_ref<_Tp>::notify_all): Likewise.
(atomic_wait<_Tp>): Likewise.
(atomic_wait_explicit<_Tp>): Likewise.
(atomic_notify_one<_Tp>): Likewise.
(atomic_notify_all<_Tp>): Likewise.
* include/bits/atomic_wait.h: New file.
* include/bits/atomic_timed_wait.h: New file.
* include/bits/semaphore_base.h: New file.
* include/std/atomic (atomic::wait): Define.
(atomic::wait_one): Likewise.
(atomic::wait_all): Likewise.
(atomic<_Tp>::wait): Likewise.
(atomic<_Tp>::wait_one): Likewise.
(atomic<_Tp>::wait_all): Likewise.
(atomic<_Tp*>::wait): Likewise.
(atomic<_Tp*>::wait_one): Likewise.
(atomic<_Tp*>::wait_all): Likewise.
* include/std/latch: New file.
* include/std/semaphore: New file.
* include/std/version: Add __cpp_lib_semaphore and
__cpp_lib_latch defines.
* testsuite/29_atomic/atomic/wait_notify/atomic_refs.cc: New test.
* testsuite/29_atomic/atomic/wait_notify/bool.cc: Likewise.
* testsuite/29_atomic/atomic/wait_notify/integrals.cc: Likewise.
* testsuite/29_atomic/atomic/wait_notify/floats.cc: Likewise.
* testsuite/29_atomic/atomic/wait_notify/pointers.cc: Likewise.
* testsuite/29_atomic/atomic/wait_notify/generic.cc: Liekwise.
* testsuite/29_atomic/atomic/wait_notify/generic.h: New File.
* testsuite/29_atomics/atomic_flag/wait_notify/1.cc: New test.
* testsuite/30_thread/semaphore/1.cc: New test.
* testsuite/30_thread/semaphore/2.cc: Likewise.
* testsuite/30_thread/semaphore/least_max_value_neg.cc: Likewise.
* testsuite/30_thread/semaphore/try_acquire.cc: Likewise.
* testsuite/30_thread/semaphore/try_acquire_for.cc: Likewise.
* testsuite/30_thread/semaphore/try_acquire_posix.cc: Likewise.
* testsuite/30_thread/semaphore/try_acquire_until.cc: Likewise.
* testsuite/30_thread/latch/1.cc: New test.
* testsuite/30_thread/latch/2.cc: New test.
* testsuite/30_thread/latch/3.cc: New test.
---
libstdc++-v3/include/Makefile.am  |   5 +
libstdc++-v3/include/Makefile.in  |   5 +
libstdc++-v3/include/bits/atomic_base.h   | 195 +++-
libstdc++-v3/include/bits/atomic_timed_wait.h | 281 
libstdc++-v3/include/bits/atomic_wait.h   | 301 ++
libstdc++-v3/include/bits/semaphore_base.h| 283 
libstdc++-v3/include/std/atomic   |  73 +
libstdc++-v3/include/std/latch|  90 ++
libstdc++-v3/include/std/semaphore|  92 ++
libstdc++-v3/include/std/version  |   2 +
.../atomic/wait_notify/atomic_refs.cc | 103 ++
.../29_atomics/atomic/wait_notify/bool.cc |  59 
.../29_atomics/atomic/wait_notify/floats.cc   |  32 ++
.../29_atomics/atomic/wait_notify/generic.cc  |  31 ++
.../29_atomics/atomic/wait_notify/generic.h   | 160 ++
.../atomic/wait_notify/integrals.cc   |  65 
.../29_atomics/atomic/wait_notify/pointers.cc |  59 
.../29_atomics/atomic_flag/wait_notify/1.cc   |  61 
libstdc++-v3/testsuite/30_threads/latch/1.cc  |  27 ++
libstdc++-v3/testsuite/30_threads/latch/2.cc  |  27 ++
libstdc++-v3/testsuite/30_threads/latch/3.cc  |  50 +++
.../testsuite/30_threads/semaphore/1.cc   |  27 ++
.../testsuite/30_threads/semaphore/2.cc   |  27 ++
.../semaphore/least_max_value_neg.cc  |  30 ++
.../30_threads/semaphore/try_acquire.cc   |  55 
.../30_threads/semaphore/try_acquire_for.cc   |  85 +

Re: [PATCH v2 5/16]middle-end: Add shared machinery for matching patterns involving complex numbers.

2020-09-28 Thread Richard Biener
On Fri, 25 Sep 2020, Tamar Christina wrote:

> Hi All,
> 
> This patch adds shared machinery for detecting patterns having to do with
> complex number operations.  The class ComplexPattern provides helpers for
> matching and ultimately undoing the permutation in the tree by rebuilding the
> graph.
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> 
> Ok for master?

I think you want to change all this to not look at individual
stmts:

+vect_match_expression_p (slp_tree node, tree_code code, int base, int 
idx,
+stmt_vec_info *op1, stmt_vec_info *op2)
+{
+
+  vec scalar_stmts = SLP_TREE_SCALAR_STMTS (node);
+

_all_ lanes are supposed to match the operation in SLP_TREE_REPRESENTATIVE
there's no need to do any operation-matching on lane granularity.

The only thing making a difference here is VEC_PERM_EXPR nodes where
again - there's no need to look at (eventually non-existant!)
SLP_TREE_SCALAR_STMTS but its SLP_TREE_REPRESENTATIVE.

+  vec children = SLP_TREE_CHILDREN (node);
+
+  /* If it's a VEC_PERM_EXPR we need to look one deeper.  
VEC_PERM_EXPR
+only have one entry.  So pick on.  */
+  if (node->code == VEC_PERM_EXPR)
+   children = SLP_TREE_CHILDREN (children.last ());

that's too simplistic ;)

+ *op1 = SLP_TREE_SCALAR_STMTS (children[0])[n];

please make op1,op2 slp_tree, not stmt_vec_info.

Where do you look at SLP_TREE_LANE_PERMUTATION at all?  I think
the result of

+vect_detect_pair_op (int base, slp_tree node1, int offset1, slp_tree 
node2,
+int offset2, vec *ops)

could be simply the SLP_TREE_LANE_PERMUTATION? plus its two
child nodes?

In the ComplexAddPattern patch I see

+  /* Correct the arguments after matching.  */
+  std::swap (this->m_vects[1], this->m_vects[3]);

how's that necessary?  The replacement SLP node should end up
with just a SLP_TREE_REPRESENTATIVE (the IFN function call).
That is, the only thing necessary is verification / matching of the 
appropriate "interleaving" scheme imposed by SLP_TREE_LANE_PERMUTATION.
I guess things would go wrong if instead of effectively blending
two vectors we'd interleave two smaller vector type vectors?  Say
a 4-way add and a 4-way sub interleaved into a 8-way addsub, using
V4SF and V8SF vectors?

Going to stop looking at the series at this point, one further note
is that all of the Complex*Patterns seem to share the initial
match and thus redundantly do a vect_detect_pair_op repeatedly
on each node for each pattern?  I wonder if it could be
commoned into a single pattern, they all seem to share
the initial mixed plus/minus, then we have the multiplication
on one or both operand cases.

Richard.

> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
>   * tree-vect-slp-patterns.c (complex_operation_t,class ComplexPattern):
>   New.
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imend


Re: [PATCH 4/4] libstdc++: Rearrange some range adaptors' data members

2020-09-28 Thread Patrick Palka via Gcc-patches
On Mon, 28 Sep 2020, Jonathan Wakely wrote:

> On 28/09/20 00:48 -0400, Patrick Palka via Libstdc++ wrote:
> > Since the standard range adaptors are specified to derive from the empty
> > class view_base, making their first data member store the underlying
> > view is suboptimal, for if the underlying view also derives from
> > view_base then the two view_base subobjects will be adjacent, thus
> > preventing the compiler from applying the empty base optimization to
> > elide away the storage for these two empty bases.
> > 
> > This patch improves the situation by declaring the _M_base data member
> > last instead of first in each range adaptor that has more than one data
> > member, so that the empty base optimization can apply more often.
> > 
> > Tested on x86_64-pc-linux-gnu with and wihout -m32.
> > 
> > libstdc++-v3/ChangeLog:
> > 
> > * include/std/ranges (filter_view::_M_base): Declare this data
> > member last.
> > (transform_view::_M_base): Likewise.
> > (take_view::_M_base): Likewise.
> > (take_while_view::_M_base): Likewise.
> > (drop_view::_M_base): Likewise.
> > (drop_while_view::_M_base): Likewise.
> > (join_view::_M_base): Likewise.
> > (split_view::_M_base): Likewise.
> > * testsuite/std/ranges/adaptors/sizeof.cc: Adjust expected
> > sizes.
> > ---
> > libstdc++-v3/include/std/ranges| 17 -
> > .../testsuite/std/ranges/adaptors/sizeof.cc| 18 +-
> > 2 files changed, 17 insertions(+), 18 deletions(-)
> > 
> > diff --git a/libstdc++-v3/include/std/ranges
> > b/libstdc++-v3/include/std/ranges
> > index 964a2b616a6..6fd8a85c2bf 100644
> > --- a/libstdc++-v3/include/std/ranges
> > +++ b/libstdc++-v3/include/std/ranges
> > @@ -1250,9 +1250,9 @@ namespace views
> > { return __y.__equal(__x); }
> >   };
> > 
> > -  _Vp _M_base = _Vp();
> >   [[no_unique_address]] __detail::__box<_Pred> _M_pred;
> >   [[no_unique_address]] __detail::_CachedPosition<_Vp> _M_cached_begin;
> > +  _Vp _M_base = _Vp();
> 
> The constructor's mem-initializer-list needs to be reordered, to avoid
> warnings with -Wsystem-headers.

Oops, fixed thusly.  I noticed meanwhile that I forgot to move
reverse_view's _M_base member, so the below patch adds that as well.

Tested on x86_64-pc-linux-gnu, and also verified that there are no new
-Wsystem-headers warnings.

-- >8 --

libstdc++-v3/ChangeLog:

* include/std/ranges (filter_view): Declare data member _M_base
last instead of first, and adjust constructors' member
initializer lists accordingly.
(transform_view): Likewise.
(take_view): Likewise.
(take_while_view): Likewise.
(drop_view): Likewise.
(drop_while_view): Likewise.
(join_view): Likewise.
(split_view): Likewise (and tweak formatting of the declaration
of _M_current).
(reverse_view): Likewise.
* testsuite/std/ranges/adaptors/sizeof.cc: Update expected
sizes.
---
 libstdc++-v3/include/std/ranges   | 47 +--
 .../testsuite/std/ranges/adaptors/sizeof.cc   | 20 
 2 files changed, 32 insertions(+), 35 deletions(-)

diff --git a/libstdc++-v3/include/std/ranges b/libstdc++-v3/include/std/ranges
index 964a2b616a6..7fd5d5110ed 100644
--- a/libstdc++-v3/include/std/ranges
+++ b/libstdc++-v3/include/std/ranges
@@ -1250,16 +1250,16 @@ namespace views
{ return __y.__equal(__x); }
   };
 
-  _Vp _M_base = _Vp();
   [[no_unique_address]] __detail::__box<_Pred> _M_pred;
   [[no_unique_address]] __detail::_CachedPosition<_Vp> _M_cached_begin;
+  _Vp _M_base = _Vp();
 
 public:
   filter_view() = default;
 
   constexpr
   filter_view(_Vp __base, _Pred __pred)
-   : _M_base(std::move(__base)), _M_pred(std::move(__pred))
+   : _M_pred(std::move(__pred)), _M_base(std::move(__base))
   { }
 
   constexpr _Vp
@@ -1588,15 +1588,15 @@ namespace views
  friend _Sentinel;
};
 
-  _Vp _M_base = _Vp();
   [[no_unique_address]] __detail::__box<_Fp> _M_fun;
+  _Vp _M_base = _Vp();
 
 public:
   transform_view() = default;
 
   constexpr
   transform_view(_Vp __base, _Fp __fun)
-   : _M_base(std::move(__base)), _M_fun(std::move(__fun))
+   : _M_fun(std::move(__fun)), _M_base(std::move(__base))
   { }
 
   constexpr _Vp
@@ -1695,15 +1695,15 @@ namespace views
  friend _Sentinel;
};
 
-  _Vp _M_base = _Vp();
   range_difference_t<_Vp> _M_count = 0;
+  _Vp _M_base = _Vp();
 
 public:
   take_view() = default;
 
   constexpr
   take_view(_Vp base, range_difference_t<_Vp> __count)
-   : _M_base(std::move(base)), _M_count(std::move(__count))
+   : _M_count(std::move(__count)), _M_base(std::move(base))
   { }
 
   constexpr _Vp
@@ -1842,17 +1842,16 @@ namespace views
  friend _Sentinel;
};
 
-  _Vp 

Re: Ping [PATCH] RX add control register PC

2020-09-28 Thread Darius Galis

Hello,

Could you please let me know the status of this patch?

Thank you,

Darius Galis

On 26-Aug-20 1:24 PM, Darius Galis wrote:

Hello,
Thank you for adjusting the patch.
I don't have commit privileges, so if you could please commit it, that would be 
great.
Best regards,
Darius Galis
On 25-Aug-20 10:48 PM, Jeff Law wrote:

On Tue, 2020-08-18 at 16:05 +0300, Darius Galis wrote:

Hello,

The following patch is adding the PC control register.
It also updates the __builtin_rx_mvfc() function, since
according to the documentation, the PC register cannot be specified
as dest.

The regression was tested with the following command:

make -k check-gcc RUNTESTFLAGS=--target_board=rx-sim

There were no additionals failures.

Please let me know if this is OK, Thank you!
Darius Galis

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index b834a2c..3436c25 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,10 @@
+2020-08-17  Darius Galis
+
+   * config/rx/rx.md (CTRLREG_PC): Add.
+   * config/rx/rx.c (CTRLREG_PC): Add
+   (rx_expand_builtin_mvtc): Add warning: PC register cannot
+   be used as dest.
+
   2020-08-03  Jonathan Wakely
   
   	* doc/cpp.texi (Variadic Macros): Use the exact ... token in

diff --git a/gcc/config/rx/rx.c b/gcc/config/rx/rx.c
index 151ad39..1cc88d9 100644
--- a/gcc/config/rx/rx.c
+++ b/gcc/config/rx/rx.c
@@ -639,6 +639,7 @@ rx_print_operand (FILE * file, rtx op, int letter)
 switch (INTVAL (op))
{
case CTRLREG_PSW:   fprintf (file, "psw"); break;
+   case CTRLREG_PC:fprintf (file, "pc"); break;
case CTRLREG_USP:   fprintf (file, "usp"); break;
case CTRLREG_FPSW:  fprintf (file, "fpsw"); break;
case CTRLREG_BPSW:  fprintf (file, "bpsw"); break;
@@ -2474,6 +2475,14 @@ rx_expand_builtin_mvtc (tree exp)
 if (! REG_P (arg2))
   arg2 = force_reg (SImode, arg2);
   
+  if (INTVAL(arg1) == 1/*PC*/)

We generally avoid comments on the same line as code.  And there should be a
space before the open paren of a function or macro argument.  So:

/* PC */
if (INTVAL (arg1) == 1)

With that change, this is OK to commit.  I can't recall if you have commit privs
or not...  If not, I can commit for you.


Thanks,
Jeff


--
Ing. Darius Galiș
Software Engineer - CyberTHOR Studios Ltd.


--
Ing. Darius Galiș
Software Engineer - CyberTHOR Studios Ltd.



Re: [PATCH v2 4/16]middle-end: Add dissolve code for when SLP fails and non-SLP loop vectorization is to be tried.

2020-09-28 Thread Richard Biener
On Fri, 25 Sep 2020, Tamar Christina wrote:

> Hi All,
> 
> This adds the dissolve code to undo the patterns created by the pattern 
> matcher
> in case SLP is to be aborted.
> 
> As mentioned in the cover letter this has one issue in that the number of 
> copies
> can needed can change depending on whether TWO_OPERATORS is needed or not.
> 
> Because of this I don't analyze the original statement when it's replaced by a
> pattern and attempt to correct it here by analyzing it after dissolve.
> 
> This however seems too late and I would need to change the unroll factor, 
> which
> seems a bit odd.  Any advice would be appreciated.
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ah, this is what you mean with the need to dissolve.  Yeah ...

@@ -2427,6 +2513,11 @@ again:
   /* Ensure that "ok" is false (with an opt_problem if dumping is 
enabled).  */
   gcc_assert (!ok);

+  /* Dissolve any SLP patterns created by the SLP pattern matcher.  */
+  opt_result dissolved = vect_dissolve_slp_only_patterns (loop_vinfo);
+  if (!dissolved)
+return dissolved;
+
   /* Try again with SLP forced off but if we didn't do any SLP there is
  no point in re-trying.  */
   if (!slp)

I think this only belongs after

  if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
 "re-trying with SLP disabled\n");

  /* Roll back state appropriately.  No SLP this time.  */
  slp = false;

thus where everything else is "restored".  In fact I wonder if
it cannot be done as part of

  /* Reset SLP type to loop_vect on all stmts.  */
  for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
{
  basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
...

?  In particular

+   /* Now we have to re-analyze the statement since we skipped it in 
the
+  the initial analysis due to the differences in copies.  */
+   res = vect_analyze_stmt (loop_vinfo, related_stmt_info,
+_to_vectorize, NULL, NULL, 
_vec);

looks unneeded since we're going to re-analyze all stmts anyway?

The thing is, there's no way to recover the original pattern stmt
in case your SLP pattern stmt was composed of "regular" pattern
stmts (the STMT_VINFO_RELATED_STMT simply gets replaced).  I
wonder if it would be easier to record the SLP pattern stmt
only in SLP_TREE_REPRESENTATIVE but _not_ in SLP_TREE_SCALAR_STMTS
(just leave those alone)?

Richard.


> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
>   * tree-vect-loop.c (vect_dissolve_slp_only_patterns): New
>   (vect_dissolve_slp_only_groups): Call vect_dissolve_slp_only_patterns.
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imend


Re: [PATCH v2] builtins: rs6000: Add builtins for fegetround, feclearexcept and feraiseexcept [PR94193]

2020-09-28 Thread Raoni Fassina Firmino via Gcc-patches
Ping.


Re: [PATCH v2 3/16]middle-end Add basic SLP pattern matching scaffolding.

2020-09-28 Thread Richard Biener
On Fri, 25 Sep 2020, Tamar Christina wrote:

> Hi All,
> 
> This patch adds the basic infrastructure for doing pattern matching on SLP 
> trees.
> This is done immediately after the SLP tree creation because it can change the
> shape of the tree in radical ways and so we would like to do it before any
> analysis is performed on the tree.
> 
> A new file tree-vect-slp-patterns.c is added which contains all the code for
> pattern matching on SLP trees.
> 
> This cover letter is short because the changes are heavily commented.
> 
> All pattern matchers need to implement the abstract type VectPatternMatch.
> The VectSimplePatternMatch abstract class provides some default functionality
> for pattern matchers that need to rebuild nodes.
> 
> The pattern matcher requires if replacing a statement in a node, that ALL
> statements be replaced.
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> 
> Ok for master?

+gcall *build ()
+{
+  stmt_vec_info stmt_info;
+

please define functions out-of-line (apart from the 1-liners)

+  /* We have to explicitly mark the old statement as unused because 
during
+statement analysis the original and new pattern statement may 
require
+different level of unrolling.  As an example add/sub when 
vectorized
+without a pattern requires 4 copies, whereas with a COMPLEX_ADD 
pattern
+this only requires 2 copies and the two statement will be treated 
as
+hand unrolled.  That means that the analysis won't happen as 
it'll find
+a mismatch.  So we don't analyze the old statement and if we end 
up
+needing it, e.g. SLP fails then we have to quickly re-analyze it.  
*/
+  STMT_VINFO_RELEVANT (stmt_info) = vect_unused_in_scope;
+  STMT_VINFO_SLP_VECT_ONLY (call_stmt_info) = true;
+  STMT_VINFO_RELATED_STMT (call_stmt_info) = stmt_info;

so this means all uses have to be inside the pattern as otherwise
there may be even non-SLP uses.  vect_mark_pattern_stmts supports
detecting patterns of patterns, I suppose the two-phase analysis
for SLP patterns does not support this right now?

+  SLP_TREE_CODE (this->m_node) = gimple_expr_code (call_stmt);;

double ;, just make it CALL_EXPR literally (or leave it ERROR_MARK)

You seem to do in-place changing of the SLP node you match off?

@@ -2192,6 +2378,17 @@ vect_analyze_slp_instance (vec_info *vinfo,
  _size, bst_map);
   if (node != NULL)
 {
+  /* Temporarily allow add_stmt calls again.  */
+  vinfo->stmt_vec_info_ro = false;
+
+  /* See if any patterns can be found in the constructed SLP tree
+before we do any analysis on it.  */
+  vect_match_slp_patterns (node, vinfo, group_size, _nunits,
+  matches, , _size, bst_map);
+
+  /* After this no more add_stmt calls are allowed.  */
+  vinfo->stmt_vec_info_ro = true;
+

I think this is a bit early to match patterns - I'd defer it to the
point where all entries into the same SLP subgraph are analyzed,
thus somewhere at the end of vect_analyze_slp loop over all
instances and match patterns?  That way phases are more clearly
separated.

Note that fiddling with vinfo->stmt_vec_info_ro is a bit ugly,
maybe add a ->add_pattern_stmt (gimple *pattern_stmt, stmt_vec_info 
orig_stmt) variant that also sets STMT_VINFO_RELATED_STMT
but doesn't check !stmt_vec_info_ro.  That could be used from
tree-vect-patterns.c as well and we could set stmt_vec_info_ro
earlier.

+  VectPattern *pattern = patt_fn (node, vinfo);
+  uint8_t n = pattern->get_arity ();
+
+  if (group_size % n != 0)
+{
+  delete pattern;

seems to require VectPattern allocation even upon failure, I suggest
to return NULL then to avoid excessive allocations.

+  if (!pattern->matches (stmt_infos, i))
+   {
+ /* We can only do replacements for entire groups, we must 
replace all
+statements in a node as the argument list/children may not 
have
+equal height then.  Operations that don't rewrite the 
arguments
+may be safe to do, so perhaps paramatrise it.  */
+
+ found_p = false;

I find it a bit ugly to iterate over "unrolls" in the machinery
rather than the individual pattern matcher which might have an
easier and in particular cheaper job here.  Since you require
all lanes to match the same pattern anyway.   Not sure if your
later patches support say, mixing complex add with different
rotate in the same SLP node.  Note the ultimate idea in the end
is that a SLP node can, of course, be split into two [but at
this point the vector type / unroll factor is not final so
general splitting at vector boundary is not desired yet].
The split can be undone for consumers by inserting a
VEC_PERM node (which should semantically be a concat + select)

+  tree type = gimple_expr_type (STMT_VINFO_STMT (stmt_info));
+  tree vectype = get_vectype_for_scalar_type (vinfo, type, node);

use 

  

Re: [PATCH] Enable GCC support for AMX

2020-09-28 Thread Hongyu Wang via Gcc-patches
Thanks!  I'll ask my colleague to help check in the patch.

Kirill Yukhin  于2020年9月28日周一 下午7:38写道:

> Hello,
>
> On 12 сен 01:00, Hongyu Wang wrote:
> > Hi
> >
> > Thanks for your review, and sorry for the late reply. It took a while
> > to finish the runtime test.
>
> Thanks for your fixes! The patch is OK for trunk.
>
> --
> Thanks, K
>


-- 
Regards,

Hongyu, Wang


Re: [PATCH v2 1/16]middle-end: Refactor refcnt to use SLP_TREE_REF_COUNT for consistency

2020-09-28 Thread Richard Biener
On Fri, 25 Sep 2020, Tamar Christina wrote:

> Hi All,
> 
> This is a small refactoring which introduces SLP_TREE_REF_COUNT and replaces
> the uses of refcnt with it.  This for consistency between the other 
> properties.
> 
> A similar patch was pre-approved last year but since there are more use now I 
> am
> sending it for review anyway.
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> 
> Ok for master?

OK.

> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
>   * tree-vectorizer.h (SLP_TREE_REF_COUNT): New.
>   * tree-vect-slp.c (_slp_tree::_slp_tree, _slp_tree::~_slp_tree,
>   vect_free_slp_tree, vect_build_slp_tree, vect_print_slp_tree,
>   slp_copy_subtree, vect_attempt_slp_rearrange_stmts): Use it.
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imend


Re: [PATCH v2 0/16][RFC][AArch64/Arm/SVE/SVE2/MVE]middle-end Add support for SLP vectorization of complex number instructions.

2020-09-28 Thread Richard Biener
On Fri, 25 Sep 2020, Tamar Christina wrote:

> Hi All,
> 
> This patch series adds support for SLP vectorization of complex instructions 
> [1].
> 
> These instructions exist only in their vector forms and require you to 
> recognize
> two statements in parallel.  Complex operations usually require a permute due 
> to
> the fact that the real and imaginary numbers are stored intermixed but these 
> vector
> instructions expect this and no longer need the compiler to generate a 
> permute.
> 
> For this reason the pass also re-orders the loads in the SLP tree such that 
> they
> become contiguous and no longer need the permutes.  The Basic Blocks are left
> untouched such that the scalar loop will still correctly issue permutes.
> 
> The instructions also support rotations along the Argand plane, as such the 
> operands
> have to be re-ordered to coincide with their load group.
> 
> For now, this patch only adds support for:
> 
>   * Complex Addition with rotation of 0 and 180.
>   * Complex Multiplication and Multiplication where one operand is conjucated.
>   * Complex FMA and FMA where one operand is conjucated.
>   * Complex FMS and FMS where one operand is conjucated.
>   
> Complex dot-product is not currently supported in this patch set as build_slp 
> fails
> for it.  This will be provided as a future patch.
>   
> These are supported for both integer and floating point and as such these 
> don't look
> for real or imaginary pairs but instead rely on the early lowering of complex
> numbers by GCC and canonicazation of the operations such that it just 
> recognizes any
> instruction sequence matching the operations requested.
> 
> To be safe when the it is not sure it can support the operation or if it 
> finds something it
> does not understand it backs off.
> 
> This patch is an RFC and I am looking on feedback on the approach.  
> Particularly
> this series has one problem which is when it is decided that SLP is not viable
> and that the normal loop vectorizer is to be used.
> 
> In this case I dissolve the changes but the compiler crashes because the use 
> of
> pattern matcher essentially undoes two_operands.  This means that the number 
> of
> copies needed when using the patterns and when not are different.  When using
> the patterns the two operands become the same and so are treated as manually
> unrolled loops.  The problem is that because nunits has already been decided
> along with the unroll factor.  When the dissolved statements are then analyzed
> they fail.  This is also the reason why I cannot analyze both the pattern and
> original statements initially.

That's the same as with "regular" patterns btw., if vectorizing the
pattern fails vectorization fails, we never re-consider and we also
have no way of multiple patterns to choose from.

The way "regular" patterns make this a non-issue is that they try
to only convert things that are likely unhandled/suboptimal and
most likely vectorizable.

That said - the solution to the ICE is to _not_ dissolve the changes and
instead make vectorization fail.

Richard.

> The relavent placed in the source code have comments describing the problem.
> 
> [1] https://developer.arm.com/documentation/ddi0487/fc/
> 
> Thanks,
> Tamar


Re: [PATCH] Enable GCC support for AMX

2020-09-28 Thread Kirill Yukhin via Gcc-patches
Hello,

On 12 сен 01:00, Hongyu Wang wrote:
> Hi
> 
> Thanks for your review, and sorry for the late reply. It took a while
> to finish the runtime test.

Thanks for your fixes! The patch is OK for trunk.

--
Thanks, K


Re: Export info about side effects of builtins out of tree-ssa-alias.c

2020-09-28 Thread Richard Biener
On Mon, 28 Sep 2020, Jan Hubicka wrote:

> > On Mon, 28 Sep 2020, Jan Hubicka wrote:
> > 
> > > > 
> > > > Hmm, no - it expects the gimple_call_use/clobber_set to include
> > > > actions of the called function itself (const functions none here)
> > > > but for passed-by-value the loads may be in the call itself
> > > > which is where it uses gimple_call_arg_flags to skip unused ones.
> > > > 
> > > > But note that PTA uses gimple_call_arg_flags to compute
> > > > the gimple_call_use/clobber_set (but only in IPA mode) and the 
> > > > individual 
> > > > BUILT_IN_* handlings in tree-ssa-alias.c do what PTA would do
> > > > here, just directly as alias queries.  Originally I tried adding
> > > > "fn spec"s to BUILT_IN_* decls but that got out-of-hands ...
> > > 
> > > Yep, I was also looking into that, it looks ugly and moreover the
> > > attributes that depend on flag_math_errno and friends does not really
> > > work for function speicfic attributes, so it is bug we handle them this
> > > way.
> > > 
> > > Packing this into descriptor string in fnspec rather than descriptor
> > > sturcture returned on side works for me (and we could add them to thinws
> > > like C++ toplevel new/delete operator)
> > > > 
> > > > For example
> > > > 
> > > > /* All the following functions read memory pointed to by
> > > >their second argument.  strcat/strncat additionally
> > > >reads memory pointed to by the first argument.  */
> > > > case BUILT_IN_STRCAT:
> > > > case BUILT_IN_STRNCAT:
> > > >   {
> > > > ao_ref dref;
> > > > ao_ref_init_from_ptr_and_size (,
> > > >gimple_call_arg (call, 0),
> > > >NULL_TREE);
> > > > if (refs_may_alias_p_1 (, ref, false))
> > > >   return true;
> > > >   }
> > > > 
> > > > would be covered by "1WR" (W also implies read).  So the question
> > > > would be whether at least parts of your patch could be implemented
> > > > by making gimple_call_fnspec () return pre-determined fn-specs
> > > > for BUILT_IN_* (like we do with internal_fn_fnspec).
> > > 
> > > One important change would be that we still need to figure out if the
> > > function has other side effects.  Currently ref_maybe_used_by_call_p_1
> > > first tries to prove that ref is local and can not be used by the
> > > function unless it is passed as parameter value and then it looks into
> > > fnspec if the parameter is used.
> > > 
> > > For builtins it constructs tests based on the builtin type and bypasses
> > > the rest since it knows that all side effects are understood.
> > > 
> > > As far as I can tell, fn spec isused only by fortran for library calls,
> > > so perhaps it would not be too hard to turn fn spec semantics from "some
> > > properties of parameter handling are known" to "all memory accesses are
> > > known" or have additional way to indicate this.
> > 
> > Yeah, currently "fn spec" only says we know everything with respect
> > to a specific parameter but not the whole function.  We'd naturally
> > say the function is 'const' besides what specified by "fn spec" but
> > of course we cannot really use 'const' for this how.
> > 
> > > I guess space in return value predictor could be reserved for this, or
> > > we could have extra flags section at the begining.
> > 
> > Yeah, I think we want pure/const/unknown, so p,c,u with
> > upper case denoting 'errno' is clobbered (eh...).
> 
> Sounds good to me
> > 
> > > > 
> > > > >  We would need
> > > > > to invent how to add info that non pure/const function only access the
> > > > > argument according to fn spec, add way to represent sizes of accesses
> > > > > and how to handle errno.
> > > > 
> > > > True - fn-spec lacks an ability to specify access size - fortunately
> > > > it's internal and we could change it ;)  Whether or not sth can clobber
> > > > errno is sth we eventually need to move to the stmt level anyway
> > > > (-f[no-]math-errno crossing inline boundary) - bit sad to waste a
> > > > bit in a gimple call for this though.  OTOH we waste a ton of space
> > > > in the call_used/call_clobbered members due to bad packing
> > > > (unsigned int bitfield followed by a pointer to a bitmap).  Eventually
> > > > we could offload this kind of info from the main stmt.  We could
> > > > also allocate a fake UID to represent errno (but then the rest of
> > > > the bits in call_used/call_clobbered have to be precise) - but note
> > > > call_used/call_clobbered does not transfer during inlining.
> > > > 
> > > > So for the access size issue we could blow up fnspec by factor of two,
> > > > reserving two chars per argument/return value and do
> > > > 
> > > >  1.W3R.
> > > 
> > > We also have known bytesizes like return values of sincos
> > > (where the size really matters on the size of type that is known only at
> > > runtime).
> > > That can be 3 digits if represented 

Re: Export info about side effects of builtins out of tree-ssa-alias.c

2020-09-28 Thread Jan Hubicka
> On Mon, 28 Sep 2020, Jan Hubicka wrote:
> 
> > > 
> > > Hmm, no - it expects the gimple_call_use/clobber_set to include
> > > actions of the called function itself (const functions none here)
> > > but for passed-by-value the loads may be in the call itself
> > > which is where it uses gimple_call_arg_flags to skip unused ones.
> > > 
> > > But note that PTA uses gimple_call_arg_flags to compute
> > > the gimple_call_use/clobber_set (but only in IPA mode) and the individual 
> > > BUILT_IN_* handlings in tree-ssa-alias.c do what PTA would do
> > > here, just directly as alias queries.  Originally I tried adding
> > > "fn spec"s to BUILT_IN_* decls but that got out-of-hands ...
> > 
> > Yep, I was also looking into that, it looks ugly and moreover the
> > attributes that depend on flag_math_errno and friends does not really
> > work for function speicfic attributes, so it is bug we handle them this
> > way.
> > 
> > Packing this into descriptor string in fnspec rather than descriptor
> > sturcture returned on side works for me (and we could add them to thinws
> > like C++ toplevel new/delete operator)
> > > 
> > > For example
> > > 
> > > /* All the following functions read memory pointed to by
> > >their second argument.  strcat/strncat additionally
> > >reads memory pointed to by the first argument.  */
> > > case BUILT_IN_STRCAT:
> > > case BUILT_IN_STRNCAT:
> > >   {
> > > ao_ref dref;
> > > ao_ref_init_from_ptr_and_size (,
> > >gimple_call_arg (call, 0),
> > >NULL_TREE);
> > > if (refs_may_alias_p_1 (, ref, false))
> > >   return true;
> > >   }
> > > 
> > > would be covered by "1WR" (W also implies read).  So the question
> > > would be whether at least parts of your patch could be implemented
> > > by making gimple_call_fnspec () return pre-determined fn-specs
> > > for BUILT_IN_* (like we do with internal_fn_fnspec).
> > 
> > One important change would be that we still need to figure out if the
> > function has other side effects.  Currently ref_maybe_used_by_call_p_1
> > first tries to prove that ref is local and can not be used by the
> > function unless it is passed as parameter value and then it looks into
> > fnspec if the parameter is used.
> > 
> > For builtins it constructs tests based on the builtin type and bypasses
> > the rest since it knows that all side effects are understood.
> > 
> > As far as I can tell, fn spec isused only by fortran for library calls,
> > so perhaps it would not be too hard to turn fn spec semantics from "some
> > properties of parameter handling are known" to "all memory accesses are
> > known" or have additional way to indicate this.
> 
> Yeah, currently "fn spec" only says we know everything with respect
> to a specific parameter but not the whole function.  We'd naturally
> say the function is 'const' besides what specified by "fn spec" but
> of course we cannot really use 'const' for this how.
> 
> > I guess space in return value predictor could be reserved for this, or
> > we could have extra flags section at the begining.
> 
> Yeah, I think we want pure/const/unknown, so p,c,u with
> upper case denoting 'errno' is clobbered (eh...).

Sounds good to me
> 
> > > 
> > > >  We would need
> > > > to invent how to add info that non pure/const function only access the
> > > > argument according to fn spec, add way to represent sizes of accesses
> > > > and how to handle errno.
> > > 
> > > True - fn-spec lacks an ability to specify access size - fortunately
> > > it's internal and we could change it ;)  Whether or not sth can clobber
> > > errno is sth we eventually need to move to the stmt level anyway
> > > (-f[no-]math-errno crossing inline boundary) - bit sad to waste a
> > > bit in a gimple call for this though.  OTOH we waste a ton of space
> > > in the call_used/call_clobbered members due to bad packing
> > > (unsigned int bitfield followed by a pointer to a bitmap).  Eventually
> > > we could offload this kind of info from the main stmt.  We could
> > > also allocate a fake UID to represent errno (but then the rest of
> > > the bits in call_used/call_clobbered have to be precise) - but note
> > > call_used/call_clobbered does not transfer during inlining.
> > > 
> > > So for the access size issue we could blow up fnspec by factor of two,
> > > reserving two chars per argument/return value and do
> > > 
> > >  1.W3R.
> > 
> > We also have known bytesizes like return values of sincos
> > (where the size really matters on the size of type that is known only at
> > runtime).
> > That can be 3 digits if represented decimally (or two digits hex, but
> > it may become too small in future with vector types)
> 
> Hmm, we can use Wt for 'formal argument type', so sincos 'double *'
> would mean 'double'?  Not sure if that's enough for all cases.

We currently handle 

Re: [PATCH V2] aarch64: Do not alter force_reg returned rtx expanding pauth builtins

2020-09-28 Thread Andrea Corallo
Richard Sandiford  writes:

> Andrea Corallo  writes:
>> Hi all,
>>
>> here the reworked patch addressing Richard's suggestions.
>>
>> Regtested and bootsraped on aarch64-linux-gnu.
>>
>> Okay for trunk?
>
> OK, thanks.
>
> Richard

Into trunk as 92f0d3d03a7.

Thanks!

  Andrea


Re: Export info about side effects of builtins out of tree-ssa-alias.c

2020-09-28 Thread Richard Biener
On Mon, 28 Sep 2020, Jan Hubicka wrote:

> > 
> > Hmm, no - it expects the gimple_call_use/clobber_set to include
> > actions of the called function itself (const functions none here)
> > but for passed-by-value the loads may be in the call itself
> > which is where it uses gimple_call_arg_flags to skip unused ones.
> > 
> > But note that PTA uses gimple_call_arg_flags to compute
> > the gimple_call_use/clobber_set (but only in IPA mode) and the individual 
> > BUILT_IN_* handlings in tree-ssa-alias.c do what PTA would do
> > here, just directly as alias queries.  Originally I tried adding
> > "fn spec"s to BUILT_IN_* decls but that got out-of-hands ...
> 
> Yep, I was also looking into that, it looks ugly and moreover the
> attributes that depend on flag_math_errno and friends does not really
> work for function speicfic attributes, so it is bug we handle them this
> way.
> 
> Packing this into descriptor string in fnspec rather than descriptor
> sturcture returned on side works for me (and we could add them to thinws
> like C++ toplevel new/delete operator)
> > 
> > For example
> > 
> > /* All the following functions read memory pointed to by
> >their second argument.  strcat/strncat additionally
> >reads memory pointed to by the first argument.  */
> > case BUILT_IN_STRCAT:
> > case BUILT_IN_STRNCAT:
> >   {
> > ao_ref dref;
> > ao_ref_init_from_ptr_and_size (,
> >gimple_call_arg (call, 0),
> >NULL_TREE);
> > if (refs_may_alias_p_1 (, ref, false))
> >   return true;
> >   }
> > 
> > would be covered by "1WR" (W also implies read).  So the question
> > would be whether at least parts of your patch could be implemented
> > by making gimple_call_fnspec () return pre-determined fn-specs
> > for BUILT_IN_* (like we do with internal_fn_fnspec).
> 
> One important change would be that we still need to figure out if the
> function has other side effects.  Currently ref_maybe_used_by_call_p_1
> first tries to prove that ref is local and can not be used by the
> function unless it is passed as parameter value and then it looks into
> fnspec if the parameter is used.
> 
> For builtins it constructs tests based on the builtin type and bypasses
> the rest since it knows that all side effects are understood.
> 
> As far as I can tell, fn spec isused only by fortran for library calls,
> so perhaps it would not be too hard to turn fn spec semantics from "some
> properties of parameter handling are known" to "all memory accesses are
> known" or have additional way to indicate this.

Yeah, currently "fn spec" only says we know everything with respect
to a specific parameter but not the whole function.  We'd naturally
say the function is 'const' besides what specified by "fn spec" but
of course we cannot really use 'const' for this how.

> I guess space in return value predictor could be reserved for this, or
> we could have extra flags section at the begining.

Yeah, I think we want pure/const/unknown, so p,c,u with
upper case denoting 'errno' is clobbered (eh...).

> > 
> > >  We would need
> > > to invent how to add info that non pure/const function only access the
> > > argument according to fn spec, add way to represent sizes of accesses
> > > and how to handle errno.
> > 
> > True - fn-spec lacks an ability to specify access size - fortunately
> > it's internal and we could change it ;)  Whether or not sth can clobber
> > errno is sth we eventually need to move to the stmt level anyway
> > (-f[no-]math-errno crossing inline boundary) - bit sad to waste a
> > bit in a gimple call for this though.  OTOH we waste a ton of space
> > in the call_used/call_clobbered members due to bad packing
> > (unsigned int bitfield followed by a pointer to a bitmap).  Eventually
> > we could offload this kind of info from the main stmt.  We could
> > also allocate a fake UID to represent errno (but then the rest of
> > the bits in call_used/call_clobbered have to be precise) - but note
> > call_used/call_clobbered does not transfer during inlining.
> > 
> > So for the access size issue we could blow up fnspec by factor of two,
> > reserving two chars per argument/return value and do
> > 
> >  1.W3R.
> 
> We also have known bytesizes like return values of sincos
> (where the size really matters on the size of type that is known only at
> runtime).
> That can be 3 digits if represented decimally (or two digits hex, but
> it may become too small in future with vector types)

Hmm, we can use Wt for 'formal argument type', so sincos 'double *'
would mean 'double'?  Not sure if that's enough for all cases.

> One option would be make the unit to be the size of pointer pointed to,
> tat would turn it to be 1 in all cases we handle. Then we could blow up
> by 3 and have something like P1 meaning "size is described by parameter
> numer 1" and 

Re: Export info about side effects of builtins out of tree-ssa-alias.c

2020-09-28 Thread Jan Hubicka
> 
> Hmm, no - it expects the gimple_call_use/clobber_set to include
> actions of the called function itself (const functions none here)
> but for passed-by-value the loads may be in the call itself
> which is where it uses gimple_call_arg_flags to skip unused ones.
> 
> But note that PTA uses gimple_call_arg_flags to compute
> the gimple_call_use/clobber_set (but only in IPA mode) and the individual 
> BUILT_IN_* handlings in tree-ssa-alias.c do what PTA would do
> here, just directly as alias queries.  Originally I tried adding
> "fn spec"s to BUILT_IN_* decls but that got out-of-hands ...

Yep, I was also looking into that, it looks ugly and moreover the
attributes that depend on flag_math_errno and friends does not really
work for function speicfic attributes, so it is bug we handle them this
way.

Packing this into descriptor string in fnspec rather than descriptor
sturcture returned on side works for me (and we could add them to thinws
like C++ toplevel new/delete operator)
> 
> For example
> 
> /* All the following functions read memory pointed to by
>their second argument.  strcat/strncat additionally
>reads memory pointed to by the first argument.  */
> case BUILT_IN_STRCAT:
> case BUILT_IN_STRNCAT:
>   {
> ao_ref dref;
> ao_ref_init_from_ptr_and_size (,
>gimple_call_arg (call, 0),
>NULL_TREE);
> if (refs_may_alias_p_1 (, ref, false))
>   return true;
>   }
> 
> would be covered by "1WR" (W also implies read).  So the question
> would be whether at least parts of your patch could be implemented
> by making gimple_call_fnspec () return pre-determined fn-specs
> for BUILT_IN_* (like we do with internal_fn_fnspec).

One important change would be that we still need to figure out if the
function has other side effects.  Currently ref_maybe_used_by_call_p_1
first tries to prove that ref is local and can not be used by the
function unless it is passed as parameter value and then it looks into
fnspec if the parameter is used.

For builtins it constructs tests based on the builtin type and bypasses
the rest since it knows that all side effects are understood.

As far as I can tell, fn spec isused only by fortran for library calls,
so perhaps it would not be too hard to turn fn spec semantics from "some
properties of parameter handling are known" to "all memory accesses are
known" or have additional way to indicate this.

I guess space in return value predictor could be reserved for this, or
we could have extra flags section at the begining.

> 
> >  We would need
> > to invent how to add info that non pure/const function only access the
> > argument according to fn spec, add way to represent sizes of accesses
> > and how to handle errno.
> 
> True - fn-spec lacks an ability to specify access size - fortunately
> it's internal and we could change it ;)  Whether or not sth can clobber
> errno is sth we eventually need to move to the stmt level anyway
> (-f[no-]math-errno crossing inline boundary) - bit sad to waste a
> bit in a gimple call for this though.  OTOH we waste a ton of space
> in the call_used/call_clobbered members due to bad packing
> (unsigned int bitfield followed by a pointer to a bitmap).  Eventually
> we could offload this kind of info from the main stmt.  We could
> also allocate a fake UID to represent errno (but then the rest of
> the bits in call_used/call_clobbered have to be precise) - but note
> call_used/call_clobbered does not transfer during inlining.
> 
> So for the access size issue we could blow up fnspec by factor of two,
> reserving two chars per argument/return value and do
> 
>  1.W3R.

We also have known bytesizes like return values of sincos
(where the size really matters on the size of type that is known only at
runtime).
That can be 3 digits if represented decimally (or two digits hex, but
it may become too small in future with vector types)

One option would be make the unit to be the size of pointer pointed to,
tat would turn it to be 1 in all cases we handle. Then we could blow up
by 3 and have something like P1 meaning "size is described by parameter
numer 1" and "10" meaning 10 elements are written by the function.
> 
> for strncat plus add gimple_call_arg_size to access it.  The 2nd
> slot in return value could host the errno thing (eh).
> 
>  .e
> 
> for sqrt for example.
> 
> Just to repeat again - the info looks too similar to warrant an
> extra mechanism to record it.

No problem, just if we figure way to do so :)
> 
> Richard.
> 
> > Honza
> > > 
> > > > gcc/ChangeLog:
> > > > 
> > > > 2020-09-28  Jan Hubicka  
> > > > 
> > > > * tree-ssa-alias.c (ao_classify_builtin): New function 
> > > > commonizing
> > > > logic from ...
> > > > (ref_maybe_used_by_call_p_1): ... here.
> > > > (call_may_clobber_ref_p_1): ... and here.
> > > >   

Re: Export info about side effects of builtins out of tree-ssa-alias.c

2020-09-28 Thread Richard Biener
On Mon, 28 Sep 2020, Jan Hubicka wrote:

> > On Mon, 28 Sep 2020, Jan Hubicka wrote:
> > 
> > > Hi,
> > > ipa-reference, ipa-pure-const and ipa-modref could use the knowledge
> > > about bulitins which is currently harwired into
> > > ref_maybe_used_by_call_p_1, call_may_clobber_ref_p_1 and the PTA
> > > computation.  This patch breaks out logic implemented in the first two
> > > into a form of a simple descriptor that can be used by the IPA passes
> > > (and other code).
> > > 
> > > I was considering an option of putting this into def file but I do not 
> > > think
> > > it is feasible without cluttering it quite a lot.
> > > 
> > > For ipa-modref I implemented dump informing about missing builtins. 
> > > strlen,
> > > sqrt and exp seems common offenders, but that can be handled incrementally
> > > if the approach looks reasonable.
> > > I would also look adding the description for PTA (perhaps with some
> > > special cases remainig since it is more ad-hoc)
> > > 
> > > Bootstrapped/regtested x86_64-linux, OK?
> > 
> > Hmm, this looks awfully similar to fn-spec handling done in
> > process_args via gimple_call_arg_flags ()?
> 
> Kind of similar (I was also looking into this, since with modref we
> could detect noescape attribute rather easily).
> 
> I am not quite sure the attribute machinery overlaps that well with what
> we do with builtins.  tree-ssa-alias only use gimple_call_arg_flags to
> detect that pure/const fuction does not use its argument.

Hmm, no - it expects the gimple_call_use/clobber_set to include
actions of the called function itself (const functions none here)
but for passed-by-value the loads may be in the call itself
which is where it uses gimple_call_arg_flags to skip unused ones.

But note that PTA uses gimple_call_arg_flags to compute
the gimple_call_use/clobber_set (but only in IPA mode) and the individual 
BUILT_IN_* handlings in tree-ssa-alias.c do what PTA would do
here, just directly as alias queries.  Originally I tried adding
"fn spec"s to BUILT_IN_* decls but that got out-of-hands ...

For example

/* All the following functions read memory pointed to by
   their second argument.  strcat/strncat additionally
   reads memory pointed to by the first argument.  */
case BUILT_IN_STRCAT:
case BUILT_IN_STRNCAT:
  {
ao_ref dref;
ao_ref_init_from_ptr_and_size (,
   gimple_call_arg (call, 0),
   NULL_TREE);
if (refs_may_alias_p_1 (, ref, false))
  return true;
  }

would be covered by "1WR" (W also implies read).  So the question
would be whether at least parts of your patch could be implemented
by making gimple_call_fnspec () return pre-determined fn-specs
for BUILT_IN_* (like we do with internal_fn_fnspec).

>  We would need
> to invent how to add info that non pure/const function only access the
> argument according to fn spec, add way to represent sizes of accesses
> and how to handle errno.

True - fn-spec lacks an ability to specify access size - fortunately
it's internal and we could change it ;)  Whether or not sth can clobber
errno is sth we eventually need to move to the stmt level anyway
(-f[no-]math-errno crossing inline boundary) - bit sad to waste a
bit in a gimple call for this though.  OTOH we waste a ton of space
in the call_used/call_clobbered members due to bad packing
(unsigned int bitfield followed by a pointer to a bitmap).  Eventually
we could offload this kind of info from the main stmt.  We could
also allocate a fake UID to represent errno (but then the rest of
the bits in call_used/call_clobbered have to be precise) - but note
call_used/call_clobbered does not transfer during inlining.

So for the access size issue we could blow up fnspec by factor of two,
reserving two chars per argument/return value and do

 1.W3R.

for strncat plus add gimple_call_arg_size to access it.  The 2nd
slot in return value could host the errno thing (eh).

 .e

for sqrt for example.

Just to repeat again - the info looks too similar to warrant an
extra mechanism to record it.

Richard.

> Honza
> > 
> > > gcc/ChangeLog:
> > > 
> > > 2020-09-28  Jan Hubicka  
> > > 
> > >   * tree-ssa-alias.c (ao_classify_builtin): New function commonizing
> > >   logic from ...
> > >   (ref_maybe_used_by_call_p_1): ... here.
> > >   (call_may_clobber_ref_p_1): ... and here.
> > >   * tree-ssa-alias.h (enum ao_function_flags): New enum.
> > >   (struct ao_function_info): New structure.
> > >   (ao_classify_builtin): Declare.
> > > 
> > > diff --git a/gcc/tree-ssa-alias.h b/gcc/tree-ssa-alias.h
> > > index 1dd02c0ea62..eecb8da6dd7 100644
> > > --- a/gcc/tree-ssa-alias.h
> > > +++ b/gcc/tree-ssa-alias.h
> > > @@ -108,6 +108,33 @@ ao_ref::max_size_known_p () const
> > >return known_size_p (max_size);
> > >  }
> > >  
> > > +/* Flags used in ao_function_info.  */
> > > +
> > > +enum ao_function_flags
> > > +{
> 

Re: [PATCH V2] aarch64: Do not alter force_reg returned rtx expanding pauth builtins

2020-09-28 Thread Richard Sandiford
Andrea Corallo  writes:
> Hi all,
>
> here the reworked patch addressing Richard's suggestions.
>
> Regtested and bootsraped on aarch64-linux-gnu.
>
> Okay for trunk?

OK, thanks.

Richard


Re: [PATCH 4/4] libstdc++: Rearrange some range adaptors' data members

2020-09-28 Thread Jonathan Wakely via Gcc-patches

On 28/09/20 00:48 -0400, Patrick Palka via Libstdc++ wrote:

Since the standard range adaptors are specified to derive from the empty
class view_base, making their first data member store the underlying
view is suboptimal, for if the underlying view also derives from
view_base then the two view_base subobjects will be adjacent, thus
preventing the compiler from applying the empty base optimization to
elide away the storage for these two empty bases.

This patch improves the situation by declaring the _M_base data member
last instead of first in each range adaptor that has more than one data
member, so that the empty base optimization can apply more often.

Tested on x86_64-pc-linux-gnu with and wihout -m32.

libstdc++-v3/ChangeLog:

* include/std/ranges (filter_view::_M_base): Declare this data
member last.
(transform_view::_M_base): Likewise.
(take_view::_M_base): Likewise.
(take_while_view::_M_base): Likewise.
(drop_view::_M_base): Likewise.
(drop_while_view::_M_base): Likewise.
(join_view::_M_base): Likewise.
(split_view::_M_base): Likewise.
* testsuite/std/ranges/adaptors/sizeof.cc: Adjust expected
sizes.
---
libstdc++-v3/include/std/ranges| 17 -
.../testsuite/std/ranges/adaptors/sizeof.cc| 18 +-
2 files changed, 17 insertions(+), 18 deletions(-)

diff --git a/libstdc++-v3/include/std/ranges b/libstdc++-v3/include/std/ranges
index 964a2b616a6..6fd8a85c2bf 100644
--- a/libstdc++-v3/include/std/ranges
+++ b/libstdc++-v3/include/std/ranges
@@ -1250,9 +1250,9 @@ namespace views
{ return __y.__equal(__x); }
  };

-  _Vp _M_base = _Vp();
  [[no_unique_address]] __detail::__box<_Pred> _M_pred;
  [[no_unique_address]] __detail::_CachedPosition<_Vp> _M_cached_begin;
+  _Vp _M_base = _Vp();


The constructor's mem-initializer-list needs to be reordered, to avoid
warnings with -Wsystem-headers.




public:
  filter_view() = default;
@@ -1588,8 +1588,8 @@ namespace views
  friend _Sentinel;
};

-  _Vp _M_base = _Vp();
  [[no_unique_address]] __detail::__box<_Fp> _M_fun;
+  _Vp _M_base = _Vp();

public:
  transform_view() = default;
@@ -1695,8 +1695,8 @@ namespace views
  friend _Sentinel;
};

-  _Vp _M_base = _Vp();
  range_difference_t<_Vp> _M_count = 0;
+  _Vp _M_base = _Vp();

public:
  take_view() = default;
@@ -1842,8 +1842,8 @@ namespace views
  friend _Sentinel;
};

-  _Vp _M_base = _Vp();
  [[no_unique_address]] __detail::__box<_Pred> _M_pred;
+  _Vp _M_base = _Vp();

public:
  take_while_view() = default;
@@ -1902,8 +1902,8 @@ namespace views
class drop_view : public view_interface>
{
private:
-  _Vp _M_base = _Vp();
  range_difference_t<_Vp> _M_count = 0;
+  _Vp _M_base = _Vp();

  // ranges::next(begin(base), count, end(base)) is O(1) if _Vp satisfies
  // both random_access_range and sized_range. Otherwise, cache its result.
@@ -2002,9 +2002,9 @@ namespace views
class drop_while_view : public view_interface>
{
private:
-  _Vp _M_base = _Vp();
  [[no_unique_address]] __detail::__box<_Pred> _M_pred;
  [[no_unique_address]] __detail::_CachedPosition<_Vp> _M_cached_begin;
+  _Vp _M_base = _Vp();

public:
  drop_while_view() = default;
@@ -2300,12 +2300,11 @@ namespace views
  friend _Sentinel;
};

-  _Vp _M_base = _Vp();
-
  // XXX: _M_inner is "present only when !is_reference_v<_InnerRange>"
  [[no_unique_address]]
__detail::__maybe_present_t,
views::all_t<_InnerRange>> _M_inner;
+  _Vp _M_base = _Vp();

public:
  join_view() = default;
@@ -2680,8 +2679,8 @@ namespace views
  { ranges::iter_swap(__x._M_i_current(), __y._M_i_current()); }
};

-  _Vp _M_base = _Vp();
  _Pattern _M_pattern = _Pattern();
+  _Vp _M_base = _Vp();

  // XXX: _M_current is "present only if !forward_range"
  [[no_unique_address]]
diff --git a/libstdc++-v3/testsuite/std/ranges/adaptors/sizeof.cc 
b/libstdc++-v3/testsuite/std/ranges/adaptors/sizeof.cc
index 5fb1ab7e4da..a7f622bb725 100644
--- a/libstdc++-v3/testsuite/std/ranges/adaptors/sizeof.cc
+++ b/libstdc++-v3/testsuite/std/ranges/adaptors/sizeof.cc
@@ -33,17 +33,17 @@ using V = ranges::subrange;
constexpr auto ptr = sizeof(int*);
static_assert(sizeof(V) == 2*ptr);

-static_assert(sizeof(ranges::take_view) == 4*ptr);
-static_assert(sizeof(ranges::drop_view) == 4*ptr);
+static_assert(sizeof(ranges::take_view) == 3*ptr);
+static_assert(sizeof(ranges::drop_view) == 3*ptr);

-static_assert(sizeof(ranges::filter_view) == 5*ptr);
-static_assert(sizeof(ranges::take_while_view) == 4*ptr);
-static_assert(sizeof(ranges::drop_while_view) == 5*ptr);
-static_assert(sizeof(ranges::transform_view) == 4*ptr);

Re: [PATCH 3/4] libstdc++: Add test that tracks range adaptors' sizes

2020-09-28 Thread Jonathan Wakely via Gcc-patches

On 28/09/20 00:48 -0400, Patrick Palka via Libstdc++ wrote:

libstdc++-v3/ChangeLog:

* testsuite/std/ranges/adaptors/sizeof.cc: New test.


OK.


.../testsuite/std/ranges/adaptors/sizeof.cc   | 49 +++
1 file changed, 49 insertions(+)
create mode 100644 libstdc++-v3/testsuite/std/ranges/adaptors/sizeof.cc

diff --git a/libstdc++-v3/testsuite/std/ranges/adaptors/sizeof.cc 
b/libstdc++-v3/testsuite/std/ranges/adaptors/sizeof.cc
new file mode 100644
index 000..5fb1ab7e4da
--- /dev/null
+++ b/libstdc++-v3/testsuite/std/ranges/adaptors/sizeof.cc
@@ -0,0 +1,49 @@
+// Copyright (C) 2020 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// .
+
+// { dg-options "-std=gnu++2a" }
+// { dg-do compile { target c++2a } }
+
+#include 
+#include 
+
+namespace ranges = std::ranges;
+
+auto pred_f(int x) { return x%2 == 0; };
+auto pred_l = [] (int x) { return x%2 == 0; };
+
+auto func_f(int x) { return x*x; }
+auto func_l = [] (int x) { return x*x; };
+
+using V = ranges::subrange;
+constexpr auto ptr = sizeof(int*);
+static_assert(sizeof(V) == 2*ptr);
+
+static_assert(sizeof(ranges::take_view) == 4*ptr);
+static_assert(sizeof(ranges::drop_view) == 4*ptr);
+
+static_assert(sizeof(ranges::filter_view) == 5*ptr);
+static_assert(sizeof(ranges::take_while_view) == 4*ptr);
+static_assert(sizeof(ranges::drop_while_view) == 5*ptr);
+static_assert(sizeof(ranges::transform_view) == 4*ptr);
+
+static_assert(sizeof(ranges::filter_view) == 4*ptr);
+static_assert(sizeof(ranges::take_while_view) == 3*ptr);
+static_assert(sizeof(ranges::drop_while_view) == 4*ptr);
+static_assert(sizeof(ranges::transform_view) == 3*ptr);
+
+static_assert(sizeof(ranges::split_view) == 5*ptr);
--
2.28.0.618.g9bc233ae1c





Re: [PATCH 1/4] libstdc++: Reduce the size of an unbounded iota_view

2020-09-28 Thread Jonathan Wakely via Gcc-patches

On 28/09/20 00:48 -0400, Patrick Palka via Libstdc++ wrote:

libstdc++-v3/ChangeLog:

* include/std/ranges (iota_view::_M_bound): Give it
[[no_unique_address]].
* testsuite/std/ranges/iota/iota_view.cc: Check that an
unbounded iota_view has minimal size.


OK.


libstdc++-v3/include/std/ranges | 2 +-
libstdc++-v3/testsuite/std/ranges/iota/iota_view.cc | 3 +++
2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/libstdc++-v3/include/std/ranges b/libstdc++-v3/include/std/ranges
index ed04fa0001d..964a2b616a6 100644
--- a/libstdc++-v3/include/std/ranges
+++ b/libstdc++-v3/include/std/ranges
@@ -511,7 +511,7 @@ namespace ranges
  };

  _Winc _M_value = _Winc();
-  _Bound _M_bound = _Bound();
+  [[no_unique_address]] _Bound _M_bound = _Bound();

public:
  iota_view() = default;
diff --git a/libstdc++-v3/testsuite/std/ranges/iota/iota_view.cc 
b/libstdc++-v3/testsuite/std/ranges/iota/iota_view.cc
index 65d166fbd3b..8a33e10a093 100644
--- a/libstdc++-v3/testsuite/std/ranges/iota/iota_view.cc
+++ b/libstdc++-v3/testsuite/std/ranges/iota/iota_view.cc
@@ -77,6 +77,9 @@ test04()
  VERIFY( it == v.end() );
}

+// Verify we optimize away the 'bound' data member of an unbounded iota_view.
+static_assert(sizeof(std::ranges::iota_view) == 1);
+
int
main()
{
--
2.28.0.618.g9bc233ae1c





Re: [PATCH 2/4] libstdc++: Reduce the size of a subrange with empty sentinel type

2020-09-28 Thread Jonathan Wakely via Gcc-patches

On 28/09/20 00:48 -0400, Patrick Palka via Libstdc++ wrote:

libstdc++-v3/ChangeLog:

* include/bits/ranges_util.h (subrange::_M_end): Give it
[[no_unique_adcress]].
* testsuite/std/ranges/subrange/sizeof.cc: New test.


OK.


libstdc++-v3/include/bits/ranges_util.h   |  2 +-
.../testsuite/std/ranges/subrange/sizeof.cc   | 28 +++
2 files changed, 29 insertions(+), 1 deletion(-)
create mode 100644 libstdc++-v3/testsuite/std/ranges/subrange/sizeof.cc

diff --git a/libstdc++-v3/include/bits/ranges_util.h 
b/libstdc++-v3/include/bits/ranges_util.h
index 694ae796399..a98658ff5c8 100644
--- a/libstdc++-v3/include/bits/ranges_util.h
+++ b/libstdc++-v3/include/bits/ranges_util.h
@@ -208,7 +208,7 @@ namespace ranges
= _Kind == subrange_kind::sized && !sized_sentinel_for<_Sent, _It>;

  _It _M_begin = _It();
-  _Sent _M_end = _Sent();
+  [[no_unique_address]] _Sent _M_end = _Sent();

  template
struct _Size
diff --git a/libstdc++-v3/testsuite/std/ranges/subrange/sizeof.cc 
b/libstdc++-v3/testsuite/std/ranges/subrange/sizeof.cc
new file mode 100644
index 000..7e95e196c6c
--- /dev/null
+++ b/libstdc++-v3/testsuite/std/ranges/subrange/sizeof.cc
@@ -0,0 +1,28 @@
+// Copyright (C) 2020 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// .
+
+// { dg-options "-std=gnu++2a" }
+// { dg-do compile { target c++2a } }
+
+#include 
+#include 
+
+// Verify we optimize away the 'end' data member of a subrange with an empty
+// sentinel type.
+static_assert(sizeof(std::ranges::subrange,
+  std::default_sentinel_t>)
+ == sizeof(std::counted_iterator));
--
2.28.0.618.g9bc233ae1c





[PATCH 2/2] arm: Improve handling of relocations with small offsets with -mpure-code on v6m (PR96770)

2020-09-28 Thread Christophe Lyon via Gcc-patches
With -mpure-code on v6m (thumb-1), we can use small offsets with
upper/lower relocations to avoid the extra addition of the
offset.

This patch accepts expressions symbol+offset as legitimate constants
when the literal pool is disabled, making sure that the offset is
within the range supported by thumb-1 [0..255].

It also makes sure that thumb1_movsi_insn emits an error in case we
try to use it with an unsupported RTL construct.

2020-09-28  Christophe Lyon  

gcc/
* config/arm/arm.c (thumb_legitimate_constant_p): Accept
(symbol_ref + addend) when literal pool is disabled.
(arm_valid_symbolic_address_p): Add support for thumb-1 without
MOVT/MOVW.
* config/arm/thumb1.md (*thumb1_movsi_insn): Accept (symbol_ref +
addend) in the pure-code alternative.

gcc/testsuite/
* gcc.target/arm/pure-code/pr96770.c: New test.
---
 gcc/config/arm/arm.c | 15 ---
 gcc/config/arm/thumb1.md |  5 +++--
 gcc/testsuite/gcc.target/arm/pure-code/pr96770.c | 21 +
 3 files changed, 36 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/arm/pure-code/pr96770.c

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index abe357e..ceeb91f 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -9489,7 +9489,8 @@ thumb_legitimate_constant_p (machine_mode mode 
ATTRIBUTE_UNUSED, rtx x)
 we build the symbol address with upper/lower
 relocations.  */
  || (TARGET_THUMB1
- && GET_CODE (x) == SYMBOL_REF
+ && !label_mentioned_p (x)
+ && arm_valid_symbolic_address_p (x)
  && arm_disable_literal_pool)
  || flag_pic);
 }
@@ -31495,7 +31496,10 @@ arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx 
out, rtx in,
According to the ARM ELF ABI, the initial addend of REL-type relocations
processing MOVW and MOVT instructions is formed by interpreting the 16-bit
literal field of the instruction as a 16-bit signed value in the range
-   -32768 <= A < 32768.  */
+   -32768 <= A < 32768.
+
+   In Thumb-1 mode, we use upper/lower relocations which have an 8-bit
+   unsigned range of 0 <= A < 256.  */
 
 bool
 arm_valid_symbolic_address_p (rtx addr)
@@ -31519,7 +31523,12 @@ arm_valid_symbolic_address_p (rtx addr)
   xop1 = XEXP (tmp, 1);
 
   if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
- return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
+   {
+ if (TARGET_THUMB1 && !TARGET_HAVE_MOVT)
+   return IN_RANGE (INTVAL (xop1), 0, 0xff);
+ else
+   return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
+   }
 }
 
   return false;
diff --git a/gcc/config/arm/thumb1.md b/gcc/config/arm/thumb1.md
index 3dedcae..2258a52 100644
--- a/gcc/config/arm/thumb1.md
+++ b/gcc/config/arm/thumb1.md
@@ -675,7 +675,7 @@ (define_insn "*thumb1_movsi_insn"
   case 7:
   /* pure-code alternative: build the constant byte by byte,
 instead of loading it from a constant pool.  */
-   if (GET_CODE (operands[1]) == SYMBOL_REF)
+   if (arm_valid_symbolic_address_p (operands[1]))
  {
output_asm_insn (\"movs\\t%0, #:upper8_15:%1\", operands);
output_asm_insn (\"lsls\\t%0, #8\", operands);
@@ -686,7 +686,7 @@ (define_insn "*thumb1_movsi_insn"
output_asm_insn (\"adds\\t%0, #:lower0_7:%1\", operands);
return \"\";
  }
-   else
+   else if (GET_CODE (operands[1]) == CONST_INT)
  {
int i;
HOST_WIDE_INT op1 = INTVAL (operands[1]);
@@ -721,6 +721,7 @@ (define_insn "*thumb1_movsi_insn"
  output_asm_insn ("adds\t%0, %1", ops);
return "";
  }
+ gcc_unreachable ();
 
   case 8: return "ldr\t%0, %1";
   case 9: return "str\t%1, %0";
diff --git a/gcc/testsuite/gcc.target/arm/pure-code/pr96770.c 
b/gcc/testsuite/gcc.target/arm/pure-code/pr96770.c
new file mode 100644
index 000..a43d71f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/pure-code/pr96770.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-mpure-code" } */
+
+int arr[1000];
+int *f4 (void) { return [1]; }
+
+/* For cortex-m0 (thumb-1/v6m), we generate 4 movs with upper/lower:#arr+4.  */
+/* { dg-final { scan-assembler-times "\\+4" 4 { target { { ! 
arm_thumb1_movt_ok } && { ! arm_thumb2_ok } } } } } */
+
+/* For cortex-m with movt/movw (thumb-1/v8m.base or thumb-2), we
+   generate a movt/movw pair with upper/lower:#arr+4.  */
+/* { dg-final { scan-assembler-times "\\+4" 2 { target { arm_thumb1_movt_ok || 
arm_thumb2_ok } } } } */
+
+int *f5 (void) { return [80]; }
+
+/* For cortex-m0 (thumb-1/v6m), we generate 1 ldr from rodata pointer to 
arr+320.  */
+/* { dg-final { scan-assembler-times "\\+320" 1 { target { { ! 
arm_thumb1_movt_ok } && { ! arm_thumb2_ok } } } } } */
+
+/* For cortex-m with 

[PATCH 1/2] arm: Avoid indirection with -mpure-code on v6m (PR96967)

2020-09-28 Thread Christophe Lyon via Gcc-patches
With -mpure-code on v6m (thumb-1), to avoid a useless indirection when
building the address of a symbol, we want to consider SYMBOL_REF as a
legitimate constant. This way, we build the address using a series of
upper/lower relocations instead of loading the address from memory.

This patch also fixes a missing "clob" conds attribute for
thumb1_movsi_insn, needed because that alternative clobbers the flags.

2020-09-28  Christophe Lyon  

gcc/
* config/arm/arm.c (thumb_legitimate_constant_p): Add support for
disabled literal pool in thumb-1.
* config/arm/thumb1.md (thumb1_movsi_symbol_ref): Remove.
(*thumb1_movsi_insn): Add support for SYMBOL_REF with -mpure-code.

gcc/testsuite
* gcc.target/arm/pure-code/pr96767.c: New test.
---
 gcc/config/arm/arm.c |   6 ++
 gcc/config/arm/thumb1.md | 102 +++
 gcc/testsuite/gcc.target/arm/pure-code/pr96767.c |  10 +++
 3 files changed, 63 insertions(+), 55 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/arm/pure-code/pr96767.c

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 022ef6c..abe357e 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -9485,6 +9485,12 @@ thumb_legitimate_constant_p (machine_mode mode 
ATTRIBUTE_UNUSED, rtx x)
  || CONST_DOUBLE_P (x)
  || CONSTANT_ADDRESS_P (x)
  || (TARGET_HAVE_MOVT && GET_CODE (x) == SYMBOL_REF)
+ /* On Thumb-1 without MOVT/MOVW and literal pool disabled,
+we build the symbol address with upper/lower
+relocations.  */
+ || (TARGET_THUMB1
+ && GET_CODE (x) == SYMBOL_REF
+ && arm_disable_literal_pool)
  || flag_pic);
 }
 
diff --git a/gcc/config/arm/thumb1.md b/gcc/config/arm/thumb1.md
index 4a59d87..3dedcae 100644
--- a/gcc/config/arm/thumb1.md
+++ b/gcc/config/arm/thumb1.md
@@ -43,27 +43,6 @@
 
 
 
-(define_insn "thumb1_movsi_symbol_ref"
-  [(set (match_operand:SI 0 "register_operand" "=l")
-   (match_operand:SI 1 "general_operand" ""))
-   ]
-  "TARGET_THUMB1
-   && arm_disable_literal_pool
-   && GET_CODE (operands[1]) == SYMBOL_REF"
-  "*
-  output_asm_insn (\"movs\\t%0, #:upper8_15:%1\", operands);
-  output_asm_insn (\"lsls\\t%0, #8\", operands);
-  output_asm_insn (\"adds\\t%0, #:upper0_7:%1\", operands);
-  output_asm_insn (\"lsls\\t%0, #8\", operands);
-  output_asm_insn (\"adds\\t%0, #:lower8_15:%1\", operands);
-  output_asm_insn (\"lsls\\t%0, #8\", operands);
-  output_asm_insn (\"adds\\t%0, #:lower0_7:%1\", operands);
-  return \"\";
-  "
-  [(set_attr "length" "14")
-   (set_attr "conds" "clob")]
-)
-
 (define_insn "*thumb1_adddi3"
   [(set (match_operand:DI  0 "register_operand" "=l")
(plus:DI (match_operand:DI 1 "register_operand" "%0")
@@ -696,40 +675,53 @@ (define_insn "*thumb1_movsi_insn"
   case 7:
   /* pure-code alternative: build the constant byte by byte,
 instead of loading it from a constant pool.  */
-   {
- int i;
- HOST_WIDE_INT op1 = INTVAL (operands[1]);
- bool mov_done_p = false;
- rtx ops[2];
- ops[0] = operands[0];
-
- /* Emit upper 3 bytes if needed.  */
- for (i = 0; i < 3; i++)
-   {
-  int byte = (op1 >> (8 * (3 - i))) & 0xff;
-
- if (byte)
-   {
- ops[1] = GEN_INT (byte);
- if (mov_done_p)
-   output_asm_insn ("adds\t%0, %1", ops);
- else
-   output_asm_insn ("movs\t%0, %1", ops);
- mov_done_p = true;
-   }
-
- if (mov_done_p)
-   output_asm_insn ("lsls\t%0, #8", ops);
-   }
+   if (GET_CODE (operands[1]) == SYMBOL_REF)
+ {
+   output_asm_insn (\"movs\\t%0, #:upper8_15:%1\", operands);
+   output_asm_insn (\"lsls\\t%0, #8\", operands);
+   output_asm_insn (\"adds\\t%0, #:upper0_7:%1\", operands);
+   output_asm_insn (\"lsls\\t%0, #8\", operands);
+   output_asm_insn (\"adds\\t%0, #:lower8_15:%1\", operands);
+   output_asm_insn (\"lsls\\t%0, #8\", operands);
+   output_asm_insn (\"adds\\t%0, #:lower0_7:%1\", operands);
+   return \"\";
+ }
+   else
+ {
+   int i;
+   HOST_WIDE_INT op1 = INTVAL (operands[1]);
+   bool mov_done_p = false;
+   rtx ops[2];
+   ops[0] = operands[0];
+
+   /* Emit upper 3 bytes if needed.  */
+   for (i = 0; i < 3; i++)
+ {
+   int byte = (op1 >> (8 * (3 - i))) & 0xff;
+
+   if (byte)
+ {
+   ops[1] = GEN_INT (byte);
+   if (mov_done_p)
+ output_asm_insn ("adds\t%0, %1", ops);
+   else
+ output_asm_insn ("movs\t%0, %1", ops);
+   

[PATCH V2] aarch64: Do not alter force_reg returned rtx expanding pauth builtins

2020-09-28 Thread Andrea Corallo
Hi all,

here the reworked patch addressing Richard's suggestions.

Regtested and bootsraped on aarch64-linux-gnu.

Okay for trunk?

Thanks!

  Andrea

>From 946d22aa247f2d1bb0c6b10a6e6db415b34feff2 Mon Sep 17 00:00:00 2001
From: Andrea Corallo 
Date: Mon, 21 Sep 2020 13:52:45 +0100
Subject: [PATCH] aarch64: Do not alter force_reg returned rtx expanding pauth
 builtins

2020-09-21  Andrea Corallo  

* config/aarch64/aarch64-builtins.c
(aarch64_general_expand_builtin): Do not alter value on a
force_reg returned rtx.
---
 gcc/config/aarch64/aarch64-builtins.c | 13 ++---
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-builtins.c 
b/gcc/config/aarch64/aarch64-builtins.c
index b787719cf5e..d7eb8772b14 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -2079,20 +2079,13 @@ aarch64_general_expand_builtin (unsigned int fcode, 
tree exp, rtx target,
   arg0 = CALL_EXPR_ARG (exp, 0);
   op0 = force_reg (Pmode, expand_normal (arg0));
 
-  if (!target)
-   target = gen_reg_rtx (Pmode);
-  else
-   target = force_reg (Pmode, target);
-
-  emit_move_insn (target, op0);
-
   if (fcode == AARCH64_PAUTH_BUILTIN_XPACLRI)
{
  rtx lr = gen_rtx_REG (Pmode, R30_REGNUM);
  icode = CODE_FOR_xpaclri;
  emit_move_insn (lr, op0);
  emit_insn (GEN_FCN (icode) ());
- emit_move_insn (target, lr);
+ return lr;
}
   else
{
@@ -2122,11 +2115,9 @@ aarch64_general_expand_builtin (unsigned int fcode, tree 
exp, rtx target,
  emit_move_insn (x17_reg, op0);
  emit_move_insn (x16_reg, op1);
  emit_insn (GEN_FCN (icode) ());
- emit_move_insn (target, x17_reg);
+ return x17_reg;
}
 
-  return target;
-
 case AARCH64_JSCVT:
   {
expand_operand ops[2];
-- 
2.17.1



Re: Export info about side effects of builtins out of tree-ssa-alias.c

2020-09-28 Thread Jan Hubicka
> On Mon, 28 Sep 2020, Jan Hubicka wrote:
> 
> > Hi,
> > ipa-reference, ipa-pure-const and ipa-modref could use the knowledge
> > about bulitins which is currently harwired into
> > ref_maybe_used_by_call_p_1, call_may_clobber_ref_p_1 and the PTA
> > computation.  This patch breaks out logic implemented in the first two
> > into a form of a simple descriptor that can be used by the IPA passes
> > (and other code).
> > 
> > I was considering an option of putting this into def file but I do not think
> > it is feasible without cluttering it quite a lot.
> > 
> > For ipa-modref I implemented dump informing about missing builtins. strlen,
> > sqrt and exp seems common offenders, but that can be handled incrementally
> > if the approach looks reasonable.
> > I would also look adding the description for PTA (perhaps with some
> > special cases remainig since it is more ad-hoc)
> > 
> > Bootstrapped/regtested x86_64-linux, OK?
> 
> Hmm, this looks awfully similar to fn-spec handling done in
> process_args via gimple_call_arg_flags ()?

Kind of similar (I was also looking into this, since with modref we
could detect noescape attribute rather easily).

I am not quite sure the attribute machinery overlaps that well with what
we do with builtins.  tree-ssa-alias only use gimple_call_arg_flags to
detect that pure/const fuction does not use its argument.  We would need
to invent how to add info that non pure/const function only access the
argument according to fn spec, add way to represent sizes of accesses
and how to handle errno.

Honza
> 
> > gcc/ChangeLog:
> > 
> > 2020-09-28  Jan Hubicka  
> > 
> > * tree-ssa-alias.c (ao_classify_builtin): New function commonizing
> > logic from ...
> > (ref_maybe_used_by_call_p_1): ... here.
> > (call_may_clobber_ref_p_1): ... and here.
> > * tree-ssa-alias.h (enum ao_function_flags): New enum.
> > (struct ao_function_info): New structure.
> > (ao_classify_builtin): Declare.
> > 
> > diff --git a/gcc/tree-ssa-alias.h b/gcc/tree-ssa-alias.h
> > index 1dd02c0ea62..eecb8da6dd7 100644
> > --- a/gcc/tree-ssa-alias.h
> > +++ b/gcc/tree-ssa-alias.h
> > @@ -108,6 +108,33 @@ ao_ref::max_size_known_p () const
> >return known_size_p (max_size);
> >  }
> >  
> > +/* Flags used in ao_function_info.  */
> > +
> > +enum ao_function_flags
> > +{
> > +  AO_FUNCTION_BARRIER = 1,
> > +  AO_FUNCTION_ERRNO = 2,
> > +};
> > +
> > +/* Describe side effects relevant for alias analysis of function call to
> > +   DECL.  */
> > +
> > +struct ao_function_info
> > +{
> > +  int num_param_reads;  /* Number of parameters function reads from,
> > +  -1 if reads are unknown.  */
> > +  struct ao_access_info
> > +{
> > +  char param;  /* Index of parameter read/written from.  */
> > +  char size_param; /* Index of parameter specifying size of the 
> > access,
> > +  -1 if unknown.  */
> > +  char size;   /* Size of access if known, 0 if unknown.  */
> > +} reads[2];
> > +  int num_param_writes;
> > +  struct ao_access_info writes[2];
> > +  enum ao_function_flags flags;
> > +};
> > +
> >  /* In tree-ssa-alias.c  */
> >  extern void ao_ref_init (ao_ref *, tree);
> >  extern void ao_ref_init_from_ptr_and_size (ao_ref *, tree, tree);
> > @@ -158,6 +185,7 @@ extern void debug (pt_solution *ptr);
> >  extern void dump_points_to_info_for (FILE *, tree);
> >  extern void debug_points_to_info_for (tree);
> >  extern void dump_alias_stats (FILE *);
> > +extern bool ao_classify_builtin (tree callee, ao_function_info *info);
> >  
> >  
> >  /* In tree-ssa-structalias.c  */
> > diff --git a/gcc/tree-ssa-alias.c b/gcc/tree-ssa-alias.c
> > index fe390d4ffbe..c182e7bb39c 100644
> > --- a/gcc/tree-ssa-alias.c
> > +++ b/gcc/tree-ssa-alias.c
> > @@ -2503,6 +2503,507 @@ modref_may_conflict (const gimple *stmt,
> >return false;
> >  }
> >  
> > +/* If CALLEE has known side effects, fill in INFO and return true.
> > +   See tree-ssa-structalias.c:find_func_aliases
> > +   for the list of builtins we might need to handle here.  */
> > +
> > +bool
> > +ao_classify_builtin (tree callee, ao_function_info *info)
> > +{
> > +  built_in_function code = DECL_FUNCTION_CODE (callee);
> > +
> > +  switch (code)
> > +{
> > +  /* All the following functions read memory pointed to by
> > +their second argument and write memory pointed to by first
> > +argument.
> > +strcat/strncat additionally reads memory pointed to by the first
> > +argument.  */
> > +  case BUILT_IN_STRCAT:
> > +   {
> > + const static struct ao_function_info ret_info
> > +  = {
> > +   2,  /* num_param_reads.  */
> > +
> > +   /* Reads and write descriptors are triples containing:
> > +  - index of parameter read
> > +  - index of parameter specifying access size
> > +(-1 if unknown)
> > +  - access size in bytes (0 if 

Re: [PATCH] switch lowering: limit number of cluster attemps

2020-09-28 Thread Richard Biener via Gcc-patches
On Fri, Sep 25, 2020 at 4:15 PM Martin Liška  wrote:
>
> On 9/25/20 3:45 PM, Richard Biener wrote:
> > On Fri, Sep 25, 2020 at 3:32 PM Martin Liška  wrote:
> >>
> >> On 9/25/20 3:18 PM, Richard Biener wrote:
> >>> On Fri, Sep 25, 2020 at 11:13 AM Martin Liška  wrote:
> 
>  Hello.
> 
>  All right, I come up with a rapid speed up that can allow us to remove
>  the introduced parameter. It contains 2 parts:
>  - BIT TEST: we allow at maximum a range that is smaller GET_MODE_BITSIZE
>  - JT: we spent quite some time in density calculation, we can guess it 
>  first
>   and it leads to a fast bail out.
> 
>  Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
> 
>  Ready to be installed?
> >>>
> >>> Err
> >>>
> >>> +  auto_vec dest_bbs;
> >>> -  auto_bitmap dest_bbs;
> >>>
> >>> -  bitmap_set_bit (dest_bbs, sc->m_case_bb->index);
> >>> +  if (!dest_bbs.contains (sc->m_case_bb->index))
> >>> +   {
> >>> + dest_bbs.safe_push (sc->m_case_bb->index);
> >>> + if (dest_bbs.length () > m_max_case_bit_tests)
> >>> +   return false;
> >>> +   }
> >>
> >> That's intentional as m_max_case_bit_tests is a very small number (3) and
> >> I want to track *distinct* indices in dest_bbs. So dest_bbs.contains
> >> is a constant operation.
> >
> > You're storing bb->index and formerly set bb->index bit, what's the 
> > difference?
> >
> > For max 3 elements a vector is OK, of course but there should be a comment
> > that says this ;)  The static const is 'int' so it can in principle
> > hold up to two billion ;)
>
> Sure, comment is needed.
>
> >
> >>>
> >>> vec::contains is linear search so no.  Was this for the length check?
> >>> Just do
> >>>
> >>>if (bitmap_set_bit (...))
> >>> {
> >>>   length++;
> >>>   if (length > ...)
> >>
> >> I would need here bitmap_count_bits. Do you prefer it?
> >
> > bitmap_set_bit returns false if the bit was already set so you can
> > count as you add bits, see the length++ above.
>
> Ah, got it!
>
> >
> > For three elements the vec will be faster though.  May I suggest
> > to use
> >
> >   auto_vec dest_bbs;
> >
> > then and quick_push rather than safe_push (need to guard the
> > push with the max_case_bit_test).
>
> Yes.
>
> Is the patch fine with that (and Jakub's comment)?

Yes.

Thanks,
Richard.

> Martin
>
> >
> > Richard.
> >
> >
> >
> >> Martin
> >>
> >>>
>  Thanks,
>  Martin
> >>
>


Re: [PATCH] assorted improvements for fold_truth_andor_1

2020-09-28 Thread Richard Biener via Gcc-patches
On Fri, Sep 25, 2020 at 3:39 PM Alexandre Oliva  wrote:
>
>
> This patch introduces various improvements to the logic that merges
> field compares.
>
> Before the patch, we could merge:
>
>   (a.x1 EQNE b.x1)  ANDOR  (a.y1 EQNE b.y1)
>
> into something like:
>
>   (((type *))[Na] & MASK) EQNE (((type *))[Nb] & MASK)
>
> if both of A's fields live within the same alignment boundaries, and
> so do B's, at the same relative positions.  Constants may be used
> instead of the object B.
>
> The initial goal of this patch was to enable such combinations when a
> field crossed alignment boundaries, e.g. for packed types.  We can't
> generally access such fields with a single memory access, so when we
> come across such a compare, we will attempt to combine each access
> separately.
>
> Some merging opportunities were missed because of right-shifts,
> compares expressed as e.g. ((a.x1 ^ b.x1) & MASK) EQNE 0, and
> narrowing conversions, especially after earlier merges.  This patch
> introduces handlers for several cases involving these.
>
> Other merging opportunities were missed because of association.  The
> existing logic would only succeed in merging a pair of consecutive
> compares, or e.g. B with C in (A ANDOR B) ANDOR C, not even trying
> e.g. C and D in (A ANDOR (B ANDOR C)) ANDOR D.  I've generalized the
> handling of the rightmost compare in the left-hand operand, going for
> the leftmost compare in the right-hand operand, and then onto trying
> to merge compares pairwise, one from each operand, even if they are
> not consecutive, taking care to avoid merging operations with
> intervening side effects, including volatile accesses.
>
> When it is the second of a non-consecutive pair of compares that first
> accesses a word, we may merge the first compare with part of the
> second compare that refers to the same word, keeping the compare of
> the remaining bits at the spot where the second compare used to be.
>
> Handling compares with non-constant fields was somewhat generalized,
> now handling non-adjacent fields.  When a field of one object crosses
> an alignment boundary but the other doesn't, we issue the same load in
> both compares; gimple optimizers will later turn it into a single
> load, without our having to handle SAVE_EXPRs at this point.
>
> The logic for issuing split loads and compares, and ordering them, is
> now shared between all cases of compares with constants and with
> another object.
>
> The -Wno-error for toplev.o on rs6000 is because of toplev.c's:
>
>   if ((flag_sanitize & SANITIZE_ADDRESS)
>   && !FRAME_GROWS_DOWNWARD)
>
> and rs6000.h's:
>
> #define FRAME_GROWS_DOWNWARD (flag_stack_protect != 0   \
>   || (flag_sanitize & SANITIZE_ADDRESS) != 0)
>
> The mutually exclusive conditions involving flag_sanitize are now
> noticed and reported by fold-const.c's:
>
>   warning (0,
>"% of mutually exclusive equal-tests"
>" is always 0");
>
> This patch enables over 12k compare-merging opportunities that we used
> to miss in a GCC bootstrap.
>
> Regstrapped on x86_64-linux-gnu and ppc64-linux-gnu.  Ok to install?

Sorry for throwing a wrench in here but doing this kind of transform
during GENERIC folding is considered a bad thing.

There are better places during GIMPLE opts to replicate (and enhance)
what fold_truth_andor does, namely ifcombine and reassoc, to mention
the two passes that do similar transforms (they are by no means exact
matches, otherwise fold_truth_andor would be gone already).

Richard.

>
> for  gcc/ChangeLog
>
> * fold-const.c (prepare_xor): New.
> (decode_field_reference): Handle xor, shift, and narrowing
> conversions.
> (all_ones_mask_p): Remove.
> (compute_split_boundary_from_align): New.
> (build_split_load, reuse_split_load): New.
> (fold_truth_andor_1): Add recursion to combine pairs of
> non-neighboring compares.  Handle xor compared with zero.
> Handle fields straddling across alignment boundaries.
> Generalize handling of non-constant rhs.
> (fold_truth_andor): Leave sub-expression handling to the
> recursion above.
> * config/rs6000/t-rs6000 (toplev.o-warn): Disable errors.
>
> for  gcc/testsuite/ChangeLog
>
> * gcc.dg/field-merge-1.c: New.
> * gcc.dg/field-merge-2.c: New.
> * gcc.dg/field-merge-3.c: New.
> * gcc.dg/field-merge-4.c: New.
> * gcc.dg/field-merge-5.c: New.
> ---
>  gcc/config/rs6000/t-rs6000   |4
>  gcc/fold-const.c |  818 
> --
>  gcc/testsuite/gcc.dg/field-merge-1.c |   64 +++
>  gcc/testsuite/gcc.dg/field-merge-2.c |   31 +
>  gcc/testsuite/gcc.dg/field-merge-3.c |   36 +
>  gcc/testsuite/gcc.dg/field-merge-4.c |   40 ++
>  gcc/testsuite/gcc.dg/field-merge-5.c |   40 ++
>  7 files changed, 882 insertions(+), 151 deletions(-)
>  

Re: [PATCH] generalized range_query class for multiple contexts

2020-09-28 Thread Aldy Hernandez via Gcc-patches




On 9/25/20 3:17 PM, Andrew MacLeod wrote:

On 9/24/20 5:51 PM, Martin Sebor via Gcc-patches wrote:

On 9/18/20 12:38 PM, Aldy Hernandez via Gcc-patches wrote:



3. Conversion of sprintf/strlen pass to class.

This is a nonfunctional change to the sprintf/strlen passes. That is, 
no effort was made to change the passes to multi-ranges.  However, 
with this patch, we are able to plug in a ranger or evrp with just a 
few lines, since the range query mechanism share a common API.


Thanks for doing all this!  There isn't anything I don't understand
in the sprintf changes so no questions from me (well, almost none).
Just some comments:

The current call statement is available in all functions that take
a directive argument, as dir->info.callstmt.  There should be no need
to also add it as a new argument to the functions that now need it.

The change adds code along these lines in a bunch of places:

+  value_range vr;
+  if (!query->range_of_expr (vr, arg, stmt))
+    vr.set_varying (TREE_TYPE (arg));

I thought under the new Ranger APIs when a range couldn't be
determined it would be automatically set to the maximum for
the type.  I like that and have been moving in that direction
with my code myself (rather than having an API fail, have it
set the max range and succeed).

Aldy will have to comment why that is there, probably an oversight The 
API should return VARYING if it cant calculate a better range. The only 
time the API returns a FALSE for a query is when the range is 
unsupported..  ie, you ask for the range of a float statement or argument.


Right on both counts.

We could ignore the return value, or my preferred method (which I know 
Andrew hates):


gcc_assert (query->range_of_expr(vr, arg, stmt));

Aldy



Re: Export info about side effects of builtins out of tree-ssa-alias.c

2020-09-28 Thread Richard Biener
On Mon, 28 Sep 2020, Jan Hubicka wrote:

> Hi,
> ipa-reference, ipa-pure-const and ipa-modref could use the knowledge
> about bulitins which is currently harwired into
> ref_maybe_used_by_call_p_1, call_may_clobber_ref_p_1 and the PTA
> computation.  This patch breaks out logic implemented in the first two
> into a form of a simple descriptor that can be used by the IPA passes
> (and other code).
> 
> I was considering an option of putting this into def file but I do not think
> it is feasible without cluttering it quite a lot.
> 
> For ipa-modref I implemented dump informing about missing builtins. strlen,
> sqrt and exp seems common offenders, but that can be handled incrementally
> if the approach looks reasonable.
> I would also look adding the description for PTA (perhaps with some
> special cases remainig since it is more ad-hoc)
> 
> Bootstrapped/regtested x86_64-linux, OK?

Hmm, this looks awfully similar to fn-spec handling done in
process_args via gimple_call_arg_flags ()?

> gcc/ChangeLog:
> 
> 2020-09-28  Jan Hubicka  
> 
>   * tree-ssa-alias.c (ao_classify_builtin): New function commonizing
>   logic from ...
>   (ref_maybe_used_by_call_p_1): ... here.
>   (call_may_clobber_ref_p_1): ... and here.
>   * tree-ssa-alias.h (enum ao_function_flags): New enum.
>   (struct ao_function_info): New structure.
>   (ao_classify_builtin): Declare.
> 
> diff --git a/gcc/tree-ssa-alias.h b/gcc/tree-ssa-alias.h
> index 1dd02c0ea62..eecb8da6dd7 100644
> --- a/gcc/tree-ssa-alias.h
> +++ b/gcc/tree-ssa-alias.h
> @@ -108,6 +108,33 @@ ao_ref::max_size_known_p () const
>return known_size_p (max_size);
>  }
>  
> +/* Flags used in ao_function_info.  */
> +
> +enum ao_function_flags
> +{
> +  AO_FUNCTION_BARRIER = 1,
> +  AO_FUNCTION_ERRNO = 2,
> +};
> +
> +/* Describe side effects relevant for alias analysis of function call to
> +   DECL.  */
> +
> +struct ao_function_info
> +{
> +  int num_param_reads;  /* Number of parameters function reads from,
> +-1 if reads are unknown.  */
> +  struct ao_access_info
> +{
> +  char param;/* Index of parameter read/written from.  */
> +  char size_param;   /* Index of parameter specifying size of the 
> access,
> +-1 if unknown.  */
> +  char size; /* Size of access if known, 0 if unknown.  */
> +} reads[2];
> +  int num_param_writes;
> +  struct ao_access_info writes[2];
> +  enum ao_function_flags flags;
> +};
> +
>  /* In tree-ssa-alias.c  */
>  extern void ao_ref_init (ao_ref *, tree);
>  extern void ao_ref_init_from_ptr_and_size (ao_ref *, tree, tree);
> @@ -158,6 +185,7 @@ extern void debug (pt_solution *ptr);
>  extern void dump_points_to_info_for (FILE *, tree);
>  extern void debug_points_to_info_for (tree);
>  extern void dump_alias_stats (FILE *);
> +extern bool ao_classify_builtin (tree callee, ao_function_info *info);
>  
>  
>  /* In tree-ssa-structalias.c  */
> diff --git a/gcc/tree-ssa-alias.c b/gcc/tree-ssa-alias.c
> index fe390d4ffbe..c182e7bb39c 100644
> --- a/gcc/tree-ssa-alias.c
> +++ b/gcc/tree-ssa-alias.c
> @@ -2503,6 +2503,507 @@ modref_may_conflict (const gimple *stmt,
>return false;
>  }
>  
> +/* If CALLEE has known side effects, fill in INFO and return true.
> +   See tree-ssa-structalias.c:find_func_aliases
> +   for the list of builtins we might need to handle here.  */
> +
> +bool
> +ao_classify_builtin (tree callee, ao_function_info *info)
> +{
> +  built_in_function code = DECL_FUNCTION_CODE (callee);
> +
> +  switch (code)
> +{
> +  /* All the following functions read memory pointed to by
> +  their second argument and write memory pointed to by first
> +  argument.
> +  strcat/strncat additionally reads memory pointed to by the first
> +  argument.  */
> +  case BUILT_IN_STRCAT:
> + {
> +   const static struct ao_function_info ret_info
> += {
> + 2,  /* num_param_reads.  */
> +
> + /* Reads and write descriptors are triples containing:
> +- index of parameter read
> +- index of parameter specifying access size
> +  (-1 if unknown)
> +- access size in bytes (0 if unkown).  */
> +
> + {{0, -1, 0}, {1, -1, 0}},   /* Param read.  */
> + 1,  /* num_param_writes.  */
> + {{0, -1, 0}},   /* Param written.  */
> + (ao_function_flags)0,   /* flags.  */
> +  };
> +   *info = ret_info;
> +   return true;
> + }
> +  case BUILT_IN_STRNCAT:
> + {
> +   const static struct ao_function_info ret_info
> += {
> + 2,  /* num_param_reads.  */
> + {{0, -1, 0}, {1, 2, 0}},/* Param read.  */
> + 1,  /* num_param_writes.  

Re: [PATCH v2 2/2] rs6000: Expand vec_insert in expander instead of gimple [PR79251]

2020-09-28 Thread Richard Biener via Gcc-patches
On Sat, Sep 26, 2020 at 12:41 AM Segher Boessenkool
 wrote:
>
> On Fri, Sep 25, 2020 at 08:58:35AM +0200, Richard Biener wrote:
> > On Thu, Sep 24, 2020 at 9:38 PM Segher Boessenkool
> >  wrote:
> > > after which I get (-march=znver2)
> > >
> > > setg:
> > > vmovd   %edi, %xmm1
> > > vmovd   %esi, %xmm2
> > > vpbroadcastd%xmm1, %ymm1
> > > vpbroadcastd%xmm2, %ymm2
> > > vpcmpeqd.LC0(%rip), %ymm1, %ymm1
> > > vpandn  %ymm0, %ymm1, %ymm0
> > > vpand   %ymm2, %ymm1, %ymm1
> > > vpor%ymm0, %ymm1, %ymm0
> > > ret
> >
> > I get with -march=znver2 -O2
> >
> > vmovd   %edi, %xmm1
> > vmovd   %esi, %xmm2
> > vpbroadcastd%xmm1, %ymm1
> > vpbroadcastd%xmm2, %ymm2
> > vpcmpeqd.LC0(%rip), %ymm1, %ymm1
> > vpblendvb   %ymm1, %ymm2, %ymm0, %ymm0
>
> Ah, maybe my x86 compiler it too old...
>   x86_64-linux-gcc (GCC) 10.0.0 20190919 (experimental)
> not exactly old, huh.  I wonder what I do wrong then.
>
> > Now, with SSE4.2 the 16byte case compiles to
> >
> > setg:
> > .LFB0:
> > .cfi_startproc
> > movd%edi, %xmm3
> > movdqa  %xmm0, %xmm1
> > movd%esi, %xmm4
> > pshufd  $0, %xmm3, %xmm0
> > pcmpeqd .LC0(%rip), %xmm0
> > movdqa  %xmm0, %xmm2
> > pandn   %xmm1, %xmm2
> > pshufd  $0, %xmm4, %xmm1
> > pand%xmm1, %xmm0
> > por %xmm2, %xmm0
> > ret
> >
> > since there's no blend with a variable mask IIRC.
>
> PowerPC got at least *that* right since time immemorial :-)
>
> > with aarch64 and SVE it doesn't handle the 32byte case at all,
> > the 16byte case compiles to
> >
> > setg:
> > .LFB0:
> > .cfi_startproc
> > adrpx2, .LC0
> > dup v1.4s, w0
> > dup v2.4s, w1
> > ldr q3, [x2, #:lo12:.LC0]
> > cmeqv1.4s, v1.4s, v3.4s
> > bit v0.16b, v2.16b, v1.16b
> >
> > which looks equivalent to the AVX2 code.
>
> Yes, and we can do pretty much the same on Power, too.
>
> > For all of those varying the vector element type may also
> > cause "issues" I guess.
>
> For us, as long as it stays 16B vectors, all should be fine.  There may
> be issues in the compiler, but at least the hardware has no problem with
> it ;-)
>
> > > and for powerpc (changing it to 16B vectors, -mcpu=power9) it is
> > >
> > > setg:
> > > addis 9,2,.LC0@toc@ha
> > > mtvsrws 32,5
> > > mtvsrws 33,6
> > > addi 9,9,.LC0@toc@l
> > > lxv 45,0(9)
> > > vcmpequw 0,0,13
> > > xxsel 34,34,33,32
> > > blr
>
> The -mcpu=power10 code right now is just
>
> plxv 45,.LC0@pcrel
> mtvsrws 32,5
> mtvsrws 33,6
> vcmpequw 0,0,13
> xxsel 34,34,33,32
> blr
>
> (exactly the same, but less memory address setup cost), so doing
> something like this as a generic version would work quite well pretty
> much everywhere I think!

Given we don't have a good way to query for variable blend support
the only change would be to use the bit and/or way which probably
would be fine (and eventually combined).  I guess that could be done
in ISEL as well (given support for generating the mask, of course).

Of course that makes it difficult for targets to opt-out (generate
the in-memory variant)...

Richard.

>
> Segher


Re: [PATCH] tree-optimization/97151 - improve PTA for C++ operator delete

2020-09-28 Thread Richard Biener
On Fri, 25 Sep 2020, Jason Merrill wrote:

> On 9/25/20 2:30 AM, Richard Biener wrote:
> > On Thu, 24 Sep 2020, Jason Merrill wrote:
> > 
> >> On 9/24/20 3:43 AM, Richard Biener wrote:
> >>> On Wed, 23 Sep 2020, Jason Merrill wrote:
> >>>
>  On 9/23/20 2:42 PM, Richard Biener wrote:
> > On September 23, 2020 7:53:18 PM GMT+02:00, Jason Merrill
> > 
> > wrote:
> >> On 9/23/20 4:14 AM, Richard Biener wrote:
> >>> C++ operator delete, when DECL_IS_REPLACEABLE_OPERATOR_DELETE_P,
> >>> does not cause the deleted object to be escaped.  It also has no
> >>> other interesting side-effects for PTA so skip it like we do
> >>> for BUILT_IN_FREE.
> >>
> >> Hmm, this is true of the default implementation, but since the function
> >>
> >> is replaceable, we don't know what a user definition might do with the
> >> pointer.
> >
> > But can the object still be 'used' after delete? Can delete fail /
> > throw?
> >
> > What guarantee does the predicate give us?
> 
>  The deallocation function is called as part of a delete expression in
>  order
>  to
>  release the storage for an object, ending its lifetime (if it was not
>  ended
>  by
>  a destructor), so no, the object can't be used afterward.
> >>>
> >>> OK, but the delete operator can access the object contents if there
> >>> wasn't a destructor ...
> >>
>  A deallocation function that throws has undefined behavior.
> >>>
> >>> OK, so it seems the 'replaceable' operators are the global ones
> >>> (for user-defined/class-specific placement variants I see arbitrary
> >>> extra arguments that we'd possibly need to handle).
> >>>
> >>> I'm happy to revert but I'd like to have a testcase that FAILs
> >>> with the patch ;)
> >>>
> >>> Now, the following aborts:
> >>>
> >>> struct X {
> >>> static struct X saved;
> >>> int *p;
> >>> X() { __builtin_memcpy (this, , sizeof (X)); }
> >>> };
> >>> void operator delete (void *p)
> >>> {
> >>> __builtin_memcpy (::saved, p, sizeof (X));
> >>> }
> >>> int main()
> >>> {
> >>> int y = 1;
> >>> X *p = new X;
> >>> p->p = 
> >>> delete p;
> >>> X *q = new X;
> >>> *(q->p) = 2;
> >>> if (y != 2)
> >>>   __builtin_abort ();
> >>> }
> >>>
> >>> and I could fix this by not making *p but what *p points to escape.
> >>> The testcase is of course maximally awkward, but hey ... ;)
> >>>
> >>> Now this would all be moot if operator delete may not access
> >>> the object (or if the object contents are undefined at that point).
> >>>
> >>> Oh, and the testcase segfaults when compiled with GCC 10 because
> >>> there we elide the new X / delete p pair ... which is invalid then?
> >>> Hmm, we emit
> >>>
> >>> MEM[(struct X *)_8] ={v} {CLOBBER};
> >>> operator delete (_8, 8);
> >>>
> >>> so the object contents are undefined _before_ calling delete
> >>> even when I do not have a DTOR?  That is, the above,
> >>> w/o -fno-lifetime-dse, makes the PTA patch OK for the testcase.
> >>
> >> Yes, all classes have a destructor, even if it's trivial, so the object's
> >> lifetime definitely ends before the call to operator delete. This is less
> >> clear for scalar objects, but treating them similarly would be consistent
> >> with
> >> other recent changes, so I think it's fine for us to assume that scalar
> >> objects are also invalidated before the call to operator delete.  But of
> >> course this doesn't apply to explicit calls to operator delete outside of a
> >> delete expression.
> > 
> > OK, so change the testcase main slightly to
> > 
> > int main()
> > {
> >int y = 1;
> >X *p = new X;
> >p->p = 
> >::operator delete(p);
> >X *q = new X;
> >*(q->p) = 2;
> >if (y != 2)
> >  __builtin_abort ();
> > }
> > 
> > in this case the lifetime of *p does not end before calling
> > ::operator delete() and delete can stash the object contents
> > somewhere before ending its lifetime.  For the very same reason
> > we may not elide a new/delete pair like in
> > 
> > int main()
> > {
> >int *p = new int;
> >*p = 1;
> >::operator delete (p);
> > }
> 
> Correct; the permission to elide new/delete pairs are for the expressions, not
> the functions.
> 
> > which we before the change did not do only because calling
> > operator delete made p escape.  Unfortunately points-to analysis
> > cannot really reconstruct whether delete was called as part of
> > a delete expression or directly (and thus whether object lifetime
> > ended already), neither can DCE.  So I guess we need to mark
> > the operator delete call in some way to make those transforms
> > safe.  At least currently any operator delete call makes the
> > alias guarantee of a operator new call moot by forcing the object
> > to be aliased with all global and escaped memory ...
> > 
> > Looks like there are some unallocated flags for CALL_EXPR we could
> > pick but I wonder if we can recycle protected_flag 

[Ada] Add missing end location information

2020-09-28 Thread Eric Botcazou
In some cases we would fail to put the end location information on the 
outermost BIND_EXPR of a function, which is problematic when there is a 
dynamic stack allocation.

Tested on x86_64-suse-linux, applied on the mainline.  This makes the 
following tweak to lower_try_finally_dup_block obsolete:
  https://gcc.gnu.org/pipermail/gcc-patches/2020-September/553951.html
so I reverted it.


2020-09-28  Eric Botcazou  

* gcc-interface/trans.c (Subprogram_Body_to_gnu): Set the end locus
of body and declaration earlier.

-- 
Eric Botcazoudiff --git a/gcc/ada/gcc-interface/trans.c b/gcc/ada/gcc-interface/trans.c
index 3491451cc3d..f03d591a323 100644
--- a/gcc/ada/gcc-interface/trans.c
+++ b/gcc/ada/gcc-interface/trans.c
@@ -4017,6 +4017,11 @@ Subprogram_Body_to_gnu (Node_Id gnat_node)
   gnat_poplevel ();
   gnu_result = end_stmt_group ();
 
+  /* Attempt setting the end_locus of our GCC body tree, typically a BIND_EXPR,
+ then the end_locus of our GCC subprogram declaration tree.  */
+  set_end_locus_from_node (gnu_result, gnat_node);
+  set_end_locus_from_node (gnu_subprog_decl, gnat_node);
+
   /* If we populated the parameter attributes cache, we need to make sure that
  the cached expressions are evaluated on all the possible paths leading to
  their uses.  So we force their evaluation on entry of the function.  */
@@ -4111,12 +4116,6 @@ Subprogram_Body_to_gnu (Node_Id gnat_node)
 
   gnu_return_label_stack->pop ();
 
-  /* Attempt setting the end_locus of our GCC body tree, typically a
- BIND_EXPR or STATEMENT_LIST, then the end_locus of our GCC subprogram
- declaration tree.  */
-  set_end_locus_from_node (gnu_result, gnat_node);
-  set_end_locus_from_node (gnu_subprog_decl, gnat_node);
-
   /* On SEH targets, install an exception handler around the main entry
  point to catch unhandled exceptions.  */
   if (DECL_NAME (gnu_subprog_decl) == main_identifier_node


[Ada] Fix bogus alignment warning on address clause

2020-09-28 Thread Eric Botcazou
This is a regression present on the mainline and 10 branch: the compiler gives 
a bogus alignment warning on an address clause and a discriminated record type 
with variable size.

Tested on x86_64-suse-linux, applied on the mainline and 10 branch.


2020-09-28  Eric Botcazou  

* gcc-interface/decl.c (maybe_saturate_size): Add ALIGN parameter
and round down the result to ALIGN.
(gnat_to_gnu_entity): Adjust calls to maybe_saturate_size.


2020-09-28  Eric Botcazou  

* gnat.dg/addr16.adb: New test.
* gnat.dg/addr16_pkg.ads: New helper.

-- 
Eric Botcazoudiff --git a/gcc/ada/gcc-interface/decl.c b/gcc/ada/gcc-interface/decl.c
index c9c2a95170f..cd0a50b2083 100644
--- a/gcc/ada/gcc-interface/decl.c
+++ b/gcc/ada/gcc-interface/decl.c
@@ -232,7 +232,7 @@ static tree build_position_list (tree, bool, tree, tree, unsigned int, tree);
 static vec build_subst_list (Entity_Id, Entity_Id, bool);
 static vec build_variant_list (tree, Node_Id, vec,
 	 vec);
-static tree maybe_saturate_size (tree);
+static tree maybe_saturate_size (tree, unsigned int align);
 static tree validate_size (Uint, tree, Entity_Id, enum tree_code, bool, bool,
 			   const char *, const char *);
 static void set_rm_size (Uint, tree, Entity_Id);
@@ -4425,7 +4425,12 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree gnu_expr, bool definition)
 	  /* If the size is self-referential, annotate the maximum value
 	 after saturating it, if need be, to avoid a No_Uint value.  */
 	  if (CONTAINS_PLACEHOLDER_P (gnu_size))
-	gnu_size = maybe_saturate_size (max_size (gnu_size, true));
+	{
+	  const unsigned int align
+		= UI_To_Int (Alignment (gnat_entity)) * BITS_PER_UNIT;
+	  gnu_size
+		= maybe_saturate_size (max_size (gnu_size, true), align);
+	}
 
 	  /* If we are just annotating types and the type is tagged, the tag
 	 and the parent components are not generated by the front-end so
@@ -4461,7 +4466,8 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree gnu_expr, bool definition)
 		  gnu_size = size_binop (PLUS_EXPR, gnu_size, offset);
 		}
 
-	  gnu_size = maybe_saturate_size (round_up (gnu_size, align));
+	  gnu_size
+		= maybe_saturate_size (round_up (gnu_size, align), align);
 	  Set_Esize (gnat_entity, annotate_value (gnu_size));
 
 	  /* Tagged types are Strict_Alignment so RM_Size = Esize.  */
@@ -8946,15 +8952,21 @@ build_variant_list (tree gnu_qual_union_type, Node_Id gnat_variant_part,
 }
 
 /* If SIZE has overflowed, return the maximum valid size, which is the upper
-   bound of the signed sizetype in bits; otherwise return SIZE unmodified.  */
+   bound of the signed sizetype in bits, rounded down to ALIGN.  Otherwise
+   return SIZE unmodified.  */
 
 static tree
-maybe_saturate_size (tree size)
+maybe_saturate_size (tree size, unsigned int align)
 {
   if (TREE_CODE (size) == INTEGER_CST && TREE_OVERFLOW (size))
-size = size_binop (MULT_EXPR,
-		   fold_convert (bitsizetype, TYPE_MAX_VALUE (ssizetype)),
-		   build_int_cst (bitsizetype, BITS_PER_UNIT));
+{
+  size
+	= size_binop (MULT_EXPR,
+		  fold_convert (bitsizetype, TYPE_MAX_VALUE (ssizetype)),
+		  build_int_cst (bitsizetype, BITS_PER_UNIT));
+  size = round_down (size, align);
+}
+
   return size;
 }
 
-- { dg-do compile }

with Aggr16_Pkg; use Aggr16_Pkg;

package body Aggr16 is

  type Arr is array (1 .. 4) of Time;

  type Change_Type is (One, Two, Three);

  type Change (D : Change_Type) is record
case D is
  when Three =>
A : Arr;
  when Others =>
B : Boolean;
end case;
  end record;

  procedure Proc is
C : Change (Three);
  begin
C.A := (others => Null_Time);
  end;

end Aggr16;
package Aggr16_Pkg is

  type Time_Type is (A, B);

  type Time (D : Time_Type := A) is private;

  Null_Time : constant Time;

private

  type Hour is record
I1 : Integer;
I2 : Integer;
  end record;

  type Time (D : Time_Type := A) is record
case D is
  when A =>
A_Time : Integer;
  when B =>
B_Time : Hour;
end case;
  end record;

  Null_Time : constant Time := (A, 0);

end Aggr16_Pkg;