[PATCH 4/4] libstdc++: Rearrange some range adaptors' data members

2020-09-27 Thread Patrick Palka via Gcc-patches
Since the standard range adaptors are specified to derive from the empty
class view_base, making their first data member store the underlying
view is suboptimal, for if the underlying view also derives from
view_base then the two view_base subobjects will be adjacent, thus
preventing the compiler from applying the empty base optimization to
elide away the storage for these two empty bases.

This patch improves the situation by declaring the _M_base data member
last instead of first in each range adaptor that has more than one data
member, so that the empty base optimization can apply more often.

Tested on x86_64-pc-linux-gnu with and wihout -m32.

libstdc++-v3/ChangeLog:

* include/std/ranges (filter_view::_M_base): Declare this data
member last.
(transform_view::_M_base): Likewise.
(take_view::_M_base): Likewise.
(take_while_view::_M_base): Likewise.
(drop_view::_M_base): Likewise.
(drop_while_view::_M_base): Likewise.
(join_view::_M_base): Likewise.
(split_view::_M_base): Likewise.
* testsuite/std/ranges/adaptors/sizeof.cc: Adjust expected
sizes.
---
 libstdc++-v3/include/std/ranges| 17 -
 .../testsuite/std/ranges/adaptors/sizeof.cc| 18 +-
 2 files changed, 17 insertions(+), 18 deletions(-)

diff --git a/libstdc++-v3/include/std/ranges b/libstdc++-v3/include/std/ranges
index 964a2b616a6..6fd8a85c2bf 100644
--- a/libstdc++-v3/include/std/ranges
+++ b/libstdc++-v3/include/std/ranges
@@ -1250,9 +1250,9 @@ namespace views
{ return __y.__equal(__x); }
   };
 
-  _Vp _M_base = _Vp();
   [[no_unique_address]] __detail::__box<_Pred> _M_pred;
   [[no_unique_address]] __detail::_CachedPosition<_Vp> _M_cached_begin;
+  _Vp _M_base = _Vp();
 
 public:
   filter_view() = default;
@@ -1588,8 +1588,8 @@ namespace views
  friend _Sentinel;
};
 
-  _Vp _M_base = _Vp();
   [[no_unique_address]] __detail::__box<_Fp> _M_fun;
+  _Vp _M_base = _Vp();
 
 public:
   transform_view() = default;
@@ -1695,8 +1695,8 @@ namespace views
  friend _Sentinel;
};
 
-  _Vp _M_base = _Vp();
   range_difference_t<_Vp> _M_count = 0;
+  _Vp _M_base = _Vp();
 
 public:
   take_view() = default;
@@ -1842,8 +1842,8 @@ namespace views
  friend _Sentinel;
};
 
-  _Vp _M_base = _Vp();
   [[no_unique_address]] __detail::__box<_Pred> _M_pred;
+  _Vp _M_base = _Vp();
 
 public:
   take_while_view() = default;
@@ -1902,8 +1902,8 @@ namespace views
 class drop_view : public view_interface>
 {
 private:
-  _Vp _M_base = _Vp();
   range_difference_t<_Vp> _M_count = 0;
+  _Vp _M_base = _Vp();
 
   // ranges::next(begin(base), count, end(base)) is O(1) if _Vp satisfies
   // both random_access_range and sized_range. Otherwise, cache its result.
@@ -2002,9 +2002,9 @@ namespace views
 class drop_while_view : public view_interface>
 {
 private:
-  _Vp _M_base = _Vp();
   [[no_unique_address]] __detail::__box<_Pred> _M_pred;
   [[no_unique_address]] __detail::_CachedPosition<_Vp> _M_cached_begin;
+  _Vp _M_base = _Vp();
 
 public:
   drop_while_view() = default;
@@ -2300,12 +2300,11 @@ namespace views
  friend _Sentinel;
};
 
-  _Vp _M_base = _Vp();
-
   // XXX: _M_inner is "present only when !is_reference_v<_InnerRange>"
   [[no_unique_address]]
__detail::__maybe_present_t,
views::all_t<_InnerRange>> _M_inner;
+  _Vp _M_base = _Vp();
 
 public:
   join_view() = default;
@@ -2680,8 +2679,8 @@ namespace views
  { ranges::iter_swap(__x._M_i_current(), __y._M_i_current()); }
};
 
-  _Vp _M_base = _Vp();
   _Pattern _M_pattern = _Pattern();
+  _Vp _M_base = _Vp();
 
   // XXX: _M_current is "present only if !forward_range"
   [[no_unique_address]]
diff --git a/libstdc++-v3/testsuite/std/ranges/adaptors/sizeof.cc 
b/libstdc++-v3/testsuite/std/ranges/adaptors/sizeof.cc
index 5fb1ab7e4da..a7f622bb725 100644
--- a/libstdc++-v3/testsuite/std/ranges/adaptors/sizeof.cc
+++ b/libstdc++-v3/testsuite/std/ranges/adaptors/sizeof.cc
@@ -33,17 +33,17 @@ using V = ranges::subrange;
 constexpr auto ptr = sizeof(int*);
 static_assert(sizeof(V) == 2*ptr);
 
-static_assert(sizeof(ranges::take_view) == 4*ptr);
-static_assert(sizeof(ranges::drop_view) == 4*ptr);
+static_assert(sizeof(ranges::take_view) == 3*ptr);
+static_assert(sizeof(ranges::drop_view) == 3*ptr);
 
-static_assert(sizeof(ranges::filter_view) == 5*ptr);
-static_assert(sizeof(ranges::take_while_view) == 4*ptr);
-static_assert(sizeof(ranges::drop_while_view) == 5*ptr);
-static_assert(sizeof(ranges::transform_view) == 4*ptr);
+static_assert(sizeof(ranges::filter_view) == 4*ptr);
+static_assert(sizeof(ranges::take_while_view) == 3*ptr);

[PATCH 3/4] libstdc++: Add test that tracks range adaptors' sizes

2020-09-27 Thread Patrick Palka via Gcc-patches
libstdc++-v3/ChangeLog:

* testsuite/std/ranges/adaptors/sizeof.cc: New test.
---
 .../testsuite/std/ranges/adaptors/sizeof.cc   | 49 +++
 1 file changed, 49 insertions(+)
 create mode 100644 libstdc++-v3/testsuite/std/ranges/adaptors/sizeof.cc

diff --git a/libstdc++-v3/testsuite/std/ranges/adaptors/sizeof.cc 
b/libstdc++-v3/testsuite/std/ranges/adaptors/sizeof.cc
new file mode 100644
index 000..5fb1ab7e4da
--- /dev/null
+++ b/libstdc++-v3/testsuite/std/ranges/adaptors/sizeof.cc
@@ -0,0 +1,49 @@
+// Copyright (C) 2020 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// .
+
+// { dg-options "-std=gnu++2a" }
+// { dg-do compile { target c++2a } }
+
+#include 
+#include 
+
+namespace ranges = std::ranges;
+
+auto pred_f(int x) { return x%2 == 0; };
+auto pred_l = [] (int x) { return x%2 == 0; };
+
+auto func_f(int x) { return x*x; }
+auto func_l = [] (int x) { return x*x; };
+
+using V = ranges::subrange;
+constexpr auto ptr = sizeof(int*);
+static_assert(sizeof(V) == 2*ptr);
+
+static_assert(sizeof(ranges::take_view) == 4*ptr);
+static_assert(sizeof(ranges::drop_view) == 4*ptr);
+
+static_assert(sizeof(ranges::filter_view) == 5*ptr);
+static_assert(sizeof(ranges::take_while_view) == 4*ptr);
+static_assert(sizeof(ranges::drop_while_view) == 5*ptr);
+static_assert(sizeof(ranges::transform_view) == 4*ptr);
+
+static_assert(sizeof(ranges::filter_view) == 4*ptr);
+static_assert(sizeof(ranges::take_while_view) == 3*ptr);
+static_assert(sizeof(ranges::drop_while_view) == 4*ptr);
+static_assert(sizeof(ranges::transform_view) == 3*ptr);
+
+static_assert(sizeof(ranges::split_view) == 5*ptr);
-- 
2.28.0.618.g9bc233ae1c



[PATCH 2/4] libstdc++: Reduce the size of a subrange with empty sentinel type

2020-09-27 Thread Patrick Palka via Gcc-patches
libstdc++-v3/ChangeLog:

* include/bits/ranges_util.h (subrange::_M_end): Give it
[[no_unique_adcress]].
* testsuite/std/ranges/subrange/sizeof.cc: New test.
---
 libstdc++-v3/include/bits/ranges_util.h   |  2 +-
 .../testsuite/std/ranges/subrange/sizeof.cc   | 28 +++
 2 files changed, 29 insertions(+), 1 deletion(-)
 create mode 100644 libstdc++-v3/testsuite/std/ranges/subrange/sizeof.cc

diff --git a/libstdc++-v3/include/bits/ranges_util.h 
b/libstdc++-v3/include/bits/ranges_util.h
index 694ae796399..a98658ff5c8 100644
--- a/libstdc++-v3/include/bits/ranges_util.h
+++ b/libstdc++-v3/include/bits/ranges_util.h
@@ -208,7 +208,7 @@ namespace ranges
= _Kind == subrange_kind::sized && !sized_sentinel_for<_Sent, _It>;
 
   _It _M_begin = _It();
-  _Sent _M_end = _Sent();
+  [[no_unique_address]] _Sent _M_end = _Sent();
 
   template
struct _Size
diff --git a/libstdc++-v3/testsuite/std/ranges/subrange/sizeof.cc 
b/libstdc++-v3/testsuite/std/ranges/subrange/sizeof.cc
new file mode 100644
index 000..7e95e196c6c
--- /dev/null
+++ b/libstdc++-v3/testsuite/std/ranges/subrange/sizeof.cc
@@ -0,0 +1,28 @@
+// Copyright (C) 2020 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// .
+
+// { dg-options "-std=gnu++2a" }
+// { dg-do compile { target c++2a } }
+
+#include 
+#include 
+
+// Verify we optimize away the 'end' data member of a subrange with an empty
+// sentinel type.
+static_assert(sizeof(std::ranges::subrange,
+  std::default_sentinel_t>)
+ == sizeof(std::counted_iterator));
-- 
2.28.0.618.g9bc233ae1c



[PATCH 1/4] libstdc++: Reduce the size of an unbounded iota_view

2020-09-27 Thread Patrick Palka via Gcc-patches
libstdc++-v3/ChangeLog:

* include/std/ranges (iota_view::_M_bound): Give it
[[no_unique_address]].
* testsuite/std/ranges/iota/iota_view.cc: Check that an
unbounded iota_view has minimal size.
---
 libstdc++-v3/include/std/ranges | 2 +-
 libstdc++-v3/testsuite/std/ranges/iota/iota_view.cc | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/libstdc++-v3/include/std/ranges b/libstdc++-v3/include/std/ranges
index ed04fa0001d..964a2b616a6 100644
--- a/libstdc++-v3/include/std/ranges
+++ b/libstdc++-v3/include/std/ranges
@@ -511,7 +511,7 @@ namespace ranges
   };
 
   _Winc _M_value = _Winc();
-  _Bound _M_bound = _Bound();
+  [[no_unique_address]] _Bound _M_bound = _Bound();
 
 public:
   iota_view() = default;
diff --git a/libstdc++-v3/testsuite/std/ranges/iota/iota_view.cc 
b/libstdc++-v3/testsuite/std/ranges/iota/iota_view.cc
index 65d166fbd3b..8a33e10a093 100644
--- a/libstdc++-v3/testsuite/std/ranges/iota/iota_view.cc
+++ b/libstdc++-v3/testsuite/std/ranges/iota/iota_view.cc
@@ -77,6 +77,9 @@ test04()
   VERIFY( it == v.end() );
 }
 
+// Verify we optimize away the 'bound' data member of an unbounded iota_view.
+static_assert(sizeof(std::ranges::iota_view) == 1);
+
 int
 main()
 {
-- 
2.28.0.618.g9bc233ae1c



Re: [PATCH] Put absolute address jump table in data.rel.ro.local if targets support relocations

2020-09-27 Thread HAO CHEN GUI via Gcc-patches

Hi,

Gentle ping this:

https://gcc.gnu.org/pipermail/gcc-patches/2020-September/553809.html

Thanks

Gui Haochen

On 14/9/2020 上午 11:01, HAO CHEN GUI wrote:

Hi,

  Jump tables are put into text or rodata section originally. On some 
platforms, it gains the performance benefit from absolute address jump 
tables. So I want to let absolute address jump table be relocatable.  
This patch puts absolute jump table in read only relocation section if 
the target supports relocations.


    /* Judge if it's a absolute jump table. Set relocatable for
           absolute jump table if the target supports relocations.  */

        if (!CASE_VECTOR_PC_RELATIVE
            && !targetm.asm_out.generate_pic_addr_diff_vec ())
           relocatable = targetm.asm_out.reloc_rw_mask ();

        switch_to_section (targetm.asm_out.function_rodata_section

                                      (current_function_decl, 
relocatable));


The attachments are the patch diff file and change log file.

Bootstrapped and tested on powerpc64le-linux-gnu with no regressions.  
Is this okay for trunk? Any recommendations? Thanks a lot.




Re: [PATCH, rs6000] Add non-relative jump table support on Power Linux

2020-09-27 Thread HAO CHEN GUI via Gcc-patches

Segher,

    Gentle ping this:

https://gcc.gnu.org/pipermail/gcc-patches/2020-September/553486.html
Thanks
Gui Haochen

On 9/9/2020 下午 4:55, HAO CHEN GUI wrote:

Hi Segher,

    Thanks for your advice. I removed macros defined in linux64.h and 
linux.h. So they take relative jump tables by default. When 
no-relative-jumptables is set, the absolute jump tables are taken. All 
things relevant to section relocations are put in another patch. 
Thanks again.



On 8/9/2020 上午 5:46, Segher Boessenkool wrote:

On Mon, Aug 24, 2020 at 03:48:43PM +0800, HAO CHEN GUI wrote:
I'll try to be quicker at reviewing iterations of this -- there is 
quite

some way to go, without me slowing things down!

Sigh :-(


* config/rs6000/linux.h (rs6000_relative_jumptables): Define.

That macro looks like it is variable (or function).  *Make* it a
variable, please?


* config/rs6000/rs6000.c (TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC):
Define

Period?

(rs6000_gen_pic_addr_diff_vec, rs6000_output_addr_vec_elt): 
Implement.

"New function."


* config/rs6000/rs6000.md (absolute_tablejumpsi,
absolute_tablejumpsi_nospec, absolute_tablejumpdi,
absolute_tablejumpdi_nospec): Add four new expansions.

"New define_expands." or "New expanders."

* config/rs6000/rs6000.opt (mrelative-jumptables): Add a new 
option and

set rs6000_relative_jumptables to true by default.

"rs6000.opt: Add -mrelative-jumptables."


+/* Disable relative jump tables for Power Linux.  */
+#undef rs6000_relative_jumptables
+#define rs6000_relative_jumptables 0

Why?


+/* Disable relative jump tables for Power Linux64.  */
+#undef rs6000_relative_jumptables
+#define rs6000_relative_jumptables 0

(That's not what it's called...  Just don't say the "for..." at all?
It is clear from what file it is in.)


  /* Indicate that jump tables go in the text section.  */
  #undef  JUMP_TABLES_IN_TEXT_SECTION
-#define JUMP_TABLES_IN_TEXT_SECTION TARGET_64BIT
+#define JUMP_TABLES_IN_TEXT_SECTION rs6000_relative_jumptables

Not sure that is correct.  Maybe the patch using rodata (.data.rel.ro)
should be a separate patch?


  /* Define as C expression which evaluates to nonzero if the tablejump
 instruction expects the table to contain offsets from the 
address of the

 table.
 Do not define this if the table should contain absolute 
addresses.  */

-#define CASE_VECTOR_PC_RELATIVE 1
+#define CASE_VECTOR_PC_RELATIVE 0

This should depend on the new flag?


+/* Specify the machine mode that this machine uses
+   for the index in the tablejump instruction.  */
+#define CASE_VECTOR_MODE \
+  (TARGET_32BIT || rs6000_relative_jumptables ? SImode : DImode)

rs6000_relative_jumptables ? SImode : Pmode;


+  if (rs6000_relative_jumptables)
+    {
+  if (TARGET_32BIT)
+    emit_jump_insn (gen_tablejumpsi (operands[0], operands[1]));
+  else
+    emit_jump_insn (gen_tablejumpdi (operands[0], operands[1]));
+    }

Hrm, I guess we should make that a parameterized name (future work,
don't do it now :-) )


+(define_expand "absolute_tablejumpsi"

Don't prefix names; it should start with "tablejump".


Segher
* config/rs6000/rs6000-protos.h (rs6000_output_addr_vec_elt): Declare.
* config/rs6000/rs6000.c (TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC):
Define.
(rs6000_gen_pic_addr_diff_vec, rs6000_output_addr_vec_elt): Implement.
* config/rs6000/rs6000.h (CASE_VECTOR_PC_RELATIVE,
CASE_VECTOR_MODE, ASM_OUTPUT_ADDR_VEC_ELT): Define.
* config/rs6000/rs6000.md (tablejumpsi_absolute,
tablejumpsi_nospec_absolute, tablejumpdi_absolute,
tablejumpdi_nospec_absolute): New expanders.
* config/rs6000/rs6000.opt (mrelative-jumptables): Add
mrelative-jumptables.
diff --git a/gcc/config/rs6000/rs6000-protos.h 
b/gcc/config/rs6000/rs6000-protos.h
index 5508484ba19..62564dd67f2 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -155,6 +155,8 @@ extern void rs6000_split_logical (rtx [], enum rtx_code, 
bool, bool, bool);
 extern bool rs6000_pcrel_p (struct function *);
 extern bool rs6000_fndecl_pcrel_p (const_tree);
 
+extern void rs6000_output_addr_vec_elt (FILE *, int);
+
 /* Different PowerPC instruction formats that are used by GCC.  There are
various other instruction formats used by the PowerPC hardware, but these
formats are not currently used by GCC.  */
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 58f5d780603..94d1e650b94 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -1369,6 +1369,9 @@ static const struct attribute_spec 
rs6000_attribute_table[] =
 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
 
+#undef  TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC
+#define TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC rs6000_gen_pic_addr_diff_vec
+
 #undef TARGET_LEGITIMIZE_ADDRESS
 #define 

Add handling of bulitins to ipa-modref

2020-09-27 Thread Jan Hubicka
Hi,
this patch implements basic builtins handling to ipa-modref.
It breaks three additional Fortran testcases due to Fortran frontend
TBAA bugs as discussed in
https://gcc.gnu.org/pipermail/gcc-patches/2020-September/554936.html

Otherwise it bootstraps and regtests x86_64-linux.  With cc1plus I now
get:


Alias oracle query stats:
  refs_may_alias_p: 63538841 disambiguations, 73864959 queries
  ref_maybe_used_by_call_p: 142582 disambiguations, 64434151 queries
  call_may_clobber_ref_p: 23601 disambiguations, 30140 queries
  nonoverlapping_component_refs_p: 0 disambiguations, 38132 queries
  nonoverlapping_refs_since_match_p: 19438 disambiguations, 55708 must 
overlaps, 75971 queries
  aliasing_component_refs_p: 55152 disambiguations, 755956 queries
  TBAA oracle: 25191597 disambiguations 58468366 queries
   16860532 are in alias set 0
   10457818 queries asked about the same object
   125 queries asked about the same alias set
   0 access volatile
   4033743 are dependent in the DAG
   1924551 are aritificially in conflict with void *

Modref stats:
  modref use: 12085 disambiguations, 56415 queries
  modref clobber: 1613560 disambiguations, 2391667 queries
  6026445 tbaa queries (2.519768 per modref query)
  433921 base compares (0.181430 per modref query)

PTA query stats:
  pt_solution_includes: 982378 disambiguations, 13627506 queries
  pt_solutions_intersect: 1029380 disambiguations, 13198579 queries


This is 15% more use disambiguations and 38% clobber disambiguations
compared to previous build in
https://gcc.gnu.org/pipermail/gcc-patches/2020-September/554930.html

Good part of clobber disambiguation is probalby due to the bugfix
https://gcc.gnu.org/pipermail/gcc-patches/2020-September/554937.html
but that should not affect uses and thus there is certainly noticeable
improvement for builtin handling.

I get no noticeable difference for tramp3d presumably because most
alocations are on-stack or via C++ new.

gcc/ChangeLog:

2020-09-28  Jan Hubicka  

* ipa-modref.c: Include tree-ssa-alias.h.
(modref_summary::dump): Dump writes_errno.
(parm_map_for): Break out from...
(merge_call_side_effects): ... here.
(analyze_call): Handle builtins.
(analyze_function): Initialize writes_errno.
(modref_summaries::duplicate): Copy writes_errno.
(modref_write): Stream writes_errno.
(read_section): Stream writes_errno.
(compute_parm_map): Be ready for missing callee info.
(ipa_merge_modref_summary_after_inlining): Skip stores for pure
functions.
(collapse_stores): New function.
(modref_propagate_in_scc): Handle bultins.
* ipa-modref.h (modref_summary): Add writes_errno.
* tree-ssa-alias.c (call_may_clobber_ref_p_1): Check for errno.

diff --git a/gcc/ipa-modref.c b/gcc/ipa-modref.c
index 6225552e41a..bba9f0db71d 100644
--- a/gcc/ipa-modref.c
+++ b/gcc/ipa-modref.c
@@ -59,6 +59,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "value-range.h"
 #include "ipa-prop.h"
 #include "ipa-fnsummary.h"
+#include "tree-ssa-alias.h"
 
 /* Class (from which there is one global instance) that holds modref summaries
for all analyzed functions.  */
@@ -257,6 +258,8 @@ modref_summary::dump (FILE *out)
   fprintf (out, "  LTO stores:\n");
   dump_lto_records (stores_lto, out);
 }
+  if (writes_errno)
+fprintf (out, "  Writes errno\n");
 }
 
 
@@ -437,6 +440,35 @@ ignore_stores_p (tree caller, int flags)
   return false;
 }
 
+/* Return parm map value for OP.
+   This means returning nonnegative value if OP is function parameter,
+   -2 is OP points to local or readonly memory and -1 otherwise.  */
+static int
+parm_map_for (tree op)
+{
+  if (TREE_CODE (op) == SSA_NAME
+  && SSA_NAME_IS_DEFAULT_DEF (op)
+  && TREE_CODE (SSA_NAME_VAR (op)) == PARM_DECL)
+{
+  int index = 0;
+  for (tree t = DECL_ARGUMENTS (current_function_decl);
+  t != SSA_NAME_VAR (op); t = DECL_CHAIN (t))
+   {
+ if (!t)
+   {
+ index = -1;
+ break;
+   }
+ index++;
+   }
+  return index;
+}
+  else if (points_to_local_or_readonly_memory_p (op))
+return -2;
+  else
+return -1;
+}
+
 /* Merge side effects of call STMT to function with CALLEE_SUMMARY
int CUR_SUMMARY.  Return true if something changed.
If IGNORE_STORES is true, do not merge stores.  */
@@ -451,31 +483,7 @@ merge_call_side_effects (modref_summary *cur_summary,
 
   parm_map.safe_grow (gimple_call_num_args (stmt));
   for (unsigned i = 0; i < gimple_call_num_args (stmt); i++)
-{
-  tree op = gimple_call_arg (stmt, i);
-  STRIP_NOPS (op);
-  if (TREE_CODE (op) == SSA_NAME
- && SSA_NAME_IS_DEFAULT_DEF (op)
- && TREE_CODE (SSA_NAME_VAR (op)) == PARM_DECL)
-   {
- int index = 0;
- for (tree t = DECL_ARGUMENTS 

Export info about side effects of builtins out of tree-ssa-alias.c

2020-09-27 Thread Jan Hubicka
Hi,
ipa-reference, ipa-pure-const and ipa-modref could use the knowledge
about bulitins which is currently harwired into
ref_maybe_used_by_call_p_1, call_may_clobber_ref_p_1 and the PTA
computation.  This patch breaks out logic implemented in the first two
into a form of a simple descriptor that can be used by the IPA passes
(and other code).

I was considering an option of putting this into def file but I do not think
it is feasible without cluttering it quite a lot.

For ipa-modref I implemented dump informing about missing builtins. strlen,
sqrt and exp seems common offenders, but that can be handled incrementally
if the approach looks reasonable.
I would also look adding the description for PTA (perhaps with some
special cases remainig since it is more ad-hoc)

Bootstrapped/regtested x86_64-linux, OK?

gcc/ChangeLog:

2020-09-28  Jan Hubicka  

* tree-ssa-alias.c (ao_classify_builtin): New function commonizing
logic from ...
(ref_maybe_used_by_call_p_1): ... here.
(call_may_clobber_ref_p_1): ... and here.
* tree-ssa-alias.h (enum ao_function_flags): New enum.
(struct ao_function_info): New structure.
(ao_classify_builtin): Declare.

diff --git a/gcc/tree-ssa-alias.h b/gcc/tree-ssa-alias.h
index 1dd02c0ea62..eecb8da6dd7 100644
--- a/gcc/tree-ssa-alias.h
+++ b/gcc/tree-ssa-alias.h
@@ -108,6 +108,33 @@ ao_ref::max_size_known_p () const
   return known_size_p (max_size);
 }
 
+/* Flags used in ao_function_info.  */
+
+enum ao_function_flags
+{
+  AO_FUNCTION_BARRIER = 1,
+  AO_FUNCTION_ERRNO = 2,
+};
+
+/* Describe side effects relevant for alias analysis of function call to
+   DECL.  */
+
+struct ao_function_info
+{
+  int num_param_reads;  /* Number of parameters function reads from,
+  -1 if reads are unknown.  */
+  struct ao_access_info
+{
+  char param;  /* Index of parameter read/written from.  */
+  char size_param; /* Index of parameter specifying size of the access,
+  -1 if unknown.  */
+  char size;   /* Size of access if known, 0 if unknown.  */
+} reads[2];
+  int num_param_writes;
+  struct ao_access_info writes[2];
+  enum ao_function_flags flags;
+};
+
 /* In tree-ssa-alias.c  */
 extern void ao_ref_init (ao_ref *, tree);
 extern void ao_ref_init_from_ptr_and_size (ao_ref *, tree, tree);
@@ -158,6 +185,7 @@ extern void debug (pt_solution *ptr);
 extern void dump_points_to_info_for (FILE *, tree);
 extern void debug_points_to_info_for (tree);
 extern void dump_alias_stats (FILE *);
+extern bool ao_classify_builtin (tree callee, ao_function_info *info);
 
 
 /* In tree-ssa-structalias.c  */
diff --git a/gcc/tree-ssa-alias.c b/gcc/tree-ssa-alias.c
index fe390d4ffbe..c182e7bb39c 100644
--- a/gcc/tree-ssa-alias.c
+++ b/gcc/tree-ssa-alias.c
@@ -2503,6 +2503,507 @@ modref_may_conflict (const gimple *stmt,
   return false;
 }
 
+/* If CALLEE has known side effects, fill in INFO and return true.
+   See tree-ssa-structalias.c:find_func_aliases
+   for the list of builtins we might need to handle here.  */
+
+bool
+ao_classify_builtin (tree callee, ao_function_info *info)
+{
+  built_in_function code = DECL_FUNCTION_CODE (callee);
+
+  switch (code)
+{
+  /* All the following functions read memory pointed to by
+their second argument and write memory pointed to by first
+argument.
+strcat/strncat additionally reads memory pointed to by the first
+argument.  */
+  case BUILT_IN_STRCAT:
+   {
+ const static struct ao_function_info ret_info
+  = {
+   2,  /* num_param_reads.  */
+
+   /* Reads and write descriptors are triples containing:
+  - index of parameter read
+  - index of parameter specifying access size
+(-1 if unknown)
+  - access size in bytes (0 if unkown).  */
+
+   {{0, -1, 0}, {1, -1, 0}},   /* Param read.  */
+   1,  /* num_param_writes.  */
+   {{0, -1, 0}},   /* Param written.  */
+   (ao_function_flags)0,   /* flags.  */
+};
+ *info = ret_info;
+ return true;
+   }
+  case BUILT_IN_STRNCAT:
+   {
+ const static struct ao_function_info ret_info
+  = {
+   2,  /* num_param_reads.  */
+   {{0, -1, 0}, {1, 2, 0}},/* Param read.  */
+   1,  /* num_param_writes.  */
+   {{0, -1, 0}},   /* Param written.  */
+   (ao_function_flags)0,   /* flags.  */
+};
+ *info = ret_info;
+ return true;
+   }
+  case BUILT_IN_STRCPY:
+  case BUILT_IN_STPCPY:
+   {
+ const static struct ao_function_info 

Fix handling of stores in modref_summary::useful_p

2020-09-27 Thread Jan Hubicka
Hi,
this patch fixes a pasto in modref_summary::useful_p that made
ipa-modref to give up on tracking stores when all load info got lost.

Bootstrapped/regtested x86_64-linux, comitted.

gcc/ChangeLog:

2020-09-27  Jan Hubicka  

* ipa-modref.c (modref_summary::useful_p): Fix testing of stores.

diff --git a/gcc/ipa-modref.c b/gcc/ipa-modref.c
index 728c6c1523d..6225552e41a 100644
--- a/gcc/ipa-modref.c
+++ b/gcc/ipa-modref.c
@@ -135,7 +135,7 @@ modref_summary::useful_p (int ecf_flags)
 return true;
   if (ecf_flags & ECF_PURE)
 return false;
-  return stores && !loads->every_base;
+  return stores && !stores->every_base;
 }
 
 /* Dump A to OUT.  */


Re: New modref/ipa_modref optimization passes

2020-09-27 Thread Jan Hubicka
> On 9/21/20 10:10 AM, Richard Biener wrote:
> 
> > > I see, so you would expect call to alsize to initialize things in
> > > array15_unkonwn type?  That would work too.
> > Yes, that's my expectation.  But let's see what fortran folks say.
> 
> RFC patch attached; I think the following should work, but I am not
> sure whether I missed something.
> 
> I wonder what to do about
>   '!GCC$ NO_ARG_CHECK :: x
> but that seems to work fine (creates void* type) and as it only
> permits assumed size or scalar variables, the descriptor issue
> does not occur.
> 
> Thoughts?

Hi,
with somewhat improved ipa-modref and your patch i get following
failures:
FAIL: gfortran.dg/assumed_type_2.f90   -O   scan-tree-dump-times original 
"sub_array_assumed ((struct t1.0:. .) parm" 1
FAIL: gfortran.dg/assumed_type_9.f90   -O2  execution test
FAIL: gfortran.dg/assumed_type_9.f90   -Os  execution test
FAIL: gfortran.dg/class_allocate_20.f90   -O2  execution test
FAIL: gfortran.dg/class_allocate_20.f90   -O3 -fomit-frame-pointer 
-funroll-loops -fpeel-loops -ftracer -finline-functions  execution test
FAIL: gfortran.dg/class_allocate_20.f90   -O3 -g  execution test
FAIL: gfortran.dg/class_allocate_20.f90   -Os  execution test
FAIL: gfortran.dg/finalize_25.f90   -O2  execution test
FAIL: gfortran.dg/finalize_25.f90   -O3 -fomit-frame-pointer -funroll-loops 
-fpeel-loops -ftracer -finline-functions  execution test
FAIL: gfortran.dg/finalize_25.f90   -O3 -g  execution test
FAIL: gfortran.dg/finalize_25.f90   -Os  execution test
FAIL: gfortran.dg/no_arg_check_2.f90   -O   scan-tree-dump-times original 
"sub_array_assumed ((struct t1.0:. .) parm" 1
WARNING: gfortran.dg/pdt_14.f03   -O2  execution test program timed out.
FAIL: gfortran.dg/pdt_14.f03   -O2  execution test
WARNING: gfortran.dg/pdt_14.f03   -O3 -fomit-frame-pointer -funroll-loops 
-fpeel-loops -ftracer -finline-functions  execution test
program timed out.
FAIL: gfortran.dg/pdt_14.f03   -O3 -fomit-frame-pointer -funroll-loops 
-fpeel-loops -ftracer -finline-functions  execution test
WARNING: gfortran.dg/pdt_14.f03   -O3 -g  execution test program timed out.
FAIL: gfortran.dg/pdt_14.f03   -O3 -g  execution test
WARNING: gfortran.dg/pdt_14.f03   -Os  execution test program timed out.
FAIL: gfortran.dg/pdt_14.f03   -Os  execution test
FAIL: gfortran.dg/sizeof_4.f90   -O0  execution test
FAIL: gfortran.dg/sizeof_4.f90   -O1  execution test
FAIL: gfortran.dg/sizeof_4.f90   -O2  execution test
FAIL: gfortran.dg/sizeof_4.f90   -O3 -fomit-frame-pointer -funroll-loops
-fpeel-loops -ftracer -finline-functions  execution test
FAIL: gfortran.dg/sizeof_4.f90   -O3 -g  execution test
FAIL: gfortran.dg/sizeof_4.f90   -Os  execution test

With asumed_type_9.f90 we get:
__final_test_T4 (struct array15_t4 & restrict array, integer(kind=8) 
byte_stride, logical(kind=1) fini_coarray)

called as:

struct array00_t1 decl_uidesc.19

__final_test_T4 (, 24, 0);

and we optimize out initializer of desc.19 since it is TBAA
incompatible (so same problem as with assumed type but this time the
consumer descriptor is not universal; just different).


With finalize_25 I see:

__final_gn_Sl (struct array15_sl & restrict array, integer(kind=8) byte_stride, 
logical(kind=1) fini_coarray)

called as:

struct array00_sl desc.20
...
__final_gn_Sl (, 64, 0);


With pdf14_f03 I get disambiguation 
ipa-modref: in main/8, call to push_8/6 does not clobber 
__vtab_link_module_Pdtlink_8._deallocate 14->13
so this seems different and I am not quite sure what is wrong here.

FAIL: gfortran.dg/sizeof_4.f90   -O1  execution test

actually goes away with reverting your patch.

Honza
> 
> Tobias
> 

> gcc/fortran/ChangeLog:
> 
>   * trans-array.c (gfc_conv_expr_descriptor):
>   (gfc_conv_array_parameter):
>   * trans-array.h (gfc_conv_expr_descriptor):
> 
>  gcc/fortran/trans-array.c | 15 +--
>  gcc/fortran/trans-array.h |  3 ++-
>  2 files changed, 11 insertions(+), 7 deletions(-)
> 
> diff --git a/gcc/fortran/trans-array.c b/gcc/fortran/trans-array.c
> index 6566c47d4ae..a5d1b477a0a 100644
> --- a/gcc/fortran/trans-array.c
> +++ b/gcc/fortran/trans-array.c
> @@ -7216,7 +7216,7 @@ walk_coarray (gfc_expr *e)
> function call.  */
>  
>  void
> -gfc_conv_expr_descriptor (gfc_se *se, gfc_expr *expr)
> +gfc_conv_expr_descriptor (gfc_se *se, gfc_expr *expr, bool want_assumed_type)
>  {
>gfc_ss *ss;
>gfc_ss_type ss_type;
> @@ -7611,7 +7611,9 @@ gfc_conv_expr_descriptor (gfc_se *se, gfc_expr *expr)
>else
>   {
> /* Otherwise make a new one.  */
> -   if (expr->ts.type == BT_CHARACTER && expr->ts.deferred)
> +   if (want_assumed_type)
> + parmtype = ptr_type_node;
> +   else if (expr->ts.type == BT_CHARACTER && expr->ts.deferred)
>   parmtype = gfc_typenode_for_spec (>ts);
> else
>   parmtype = gfc_get_element_type (TREE_TYPE (desc));
> @@ -7950,7 +7952,8 @@ gfc_conv_array_parameter (gfc_se * se, 

Re: [committed] libstdc++: Use __libc_single_threaded to optimise atomics [PR 96817]

2020-09-27 Thread Florian Weimer
* Jonathan Wakely via Libstdc:

> We can't use __libc_single_threaded to replace __gthread_active_p
> everywhere. If we replaced the uses of __gthread_active_p in std::mutex
> then we would elide the pthread_mutex_lock in the code below, but not
> the pthread_mutex_unlock:
>
>   std::mutex m;
>   m.lock();// pthread_mutex_lock
>   std::thread t([]{}); // __libc_single_threaded = false
>   t.join();
>   m.unlock();  // pthread_mutex_unlock

Thanks for implementing this.

Eliding the mutex lock is a bit iffy because the mutex may reside in a
shared mapping.  For doing the same optimization in glibc, we will
have to check if the mutex is process-private or not.


Re: [PATCH, rs6000] correct an erroneous BTM value in the BU_P10_MISC define

2020-09-27 Thread Bill Schmidt via Gcc-patches

On 9/25/20 6:50 PM, Segher Boessenkool wrote:

On Fri, Sep 25, 2020 at 03:34:49PM -0500, will schmidt wrote:

On Fri, 2020-09-25 at 12:36 -0500, Segher Boessenkool wrote:

No, it cannot.

This is used for pdepd/pextd/cntlzdm/cnttzdm/cfuged, all of which do
need 64-bit registers to do anything sane.

This should really have defined some new builtin class, and I thought
we
could just be tricky and take a massive shortcut.  Bill has been hit
by
this already as well, sigh :-(

Ok.

The usage of that macro seems to be limited to those that you have
referenced.  i.e.

/* Builtins for scalar instructions added in ISA 3.1 (power10).  */
BU_P10_MISC_2 (CFUGED, "cfuged", CONST, cfuged)
BU_P10_MISC_2 (CNTLZDM, "cntlzdm", CONST, cntlzdm)
BU_P10_MISC_2 (CNTTZDM, "cnttzdm", CONST, cnttzdm)
BU_P10_MISC_2 (PDEPD, "pdepd", CONST, pdepd)
BU_P10_MISC_2 (PEXTD, "pextd", CONST, pextd)

So looking at the power7 entries that have the BTM_POWERPC64 entry..

BU_P7_MISC_2 (DIVWE,"divwe",  CONST,  dive_si)
BU_P7_MISC_2 (DIVWEU,   "divweu", CONST,  diveu_si)
BU_P7_POWERPC64_MISC_2 (DIVDE,  "divde",  CONST,  dive_di)
BU_P7_POWERPC64_MISC_2 (DIVDEU, "divdeu", CONST,  diveu_di)

Would it be suitable to rename the P10 macro to
BU_P10_POWERPC64_MISC_2 ?

Yes.  But that requires some more infrastructure I thought...  Maybe not
though?  And we can do that anyway of course, it's not like we do not
have way way way too much there already.


I'd then debate whether to add a unused macro to fill the gap between
BU_P10_MISC_1 and BU_P10_MISC_2

Nah, don't bother, those are just names, the numbers are meaningless :-)


If you've got schemes for a deeper fix, i'd need another hint. :-)

Talk with Bill if this makes things easier for him / harder / no
difference?


What Will has in mind is what I would prefer.  I identified this as a 
naming problem above all else.  The only issue for me is that I nearly 
missed it when converting things to use the new builtin methodology, 
because it wasn't made obvious by the naming.


Thanks,

Bill



Thanks,


Segher


[PATCH] x86: Use SET operation in MOVDIRI and MOVDIR64B

2020-09-27 Thread H.J. Lu via Gcc-patches
Since MOVDIRI and MOVDIR64B write to memory, similar to UNSPEC_MOVNT,
use SET operation in MOVDIRI and MOVDIR64B patterns with UNSPEC instead
of UNSPECV.

gcc/

PR target/97184
* config/i386/i386.md (UNSPECV_MOVDIRI): Renamed to ...
(UNSPEC_MOVDIRI): This.
(UNSPECV_MOVDIR64B): Renamed to ...
(UNSPEC_MOVDIR64B): This.
(movdiri): Use SET operation.
(@movdir64b_): Likewise.

gcc/testsuite/

PR target/97184
* gcc.target/i386/movdir64b.c: New test.
* gcc.target/i386/movdiri32.c: Likewise.
* gcc.target/i386/movdiri64.c: Likewise.
* testsuite/lib/target-supports.exp
(check_effective_target_movdir): New.
---
 gcc/config/i386/i386.md   | 20 ++--
 gcc/testsuite/gcc.target/i386/movdir64b.c | 23 +++
 gcc/testsuite/gcc.target/i386/movdiri32.c | 20 
 gcc/testsuite/gcc.target/i386/movdiri64.c | 20 
 gcc/testsuite/lib/target-supports.exp | 17 +
 5 files changed, 90 insertions(+), 10 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/movdir64b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/movdiri32.c
 create mode 100644 gcc/testsuite/gcc.target/i386/movdiri64.c

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 93aae8123fd..9dd12cf8643 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -186,6 +186,10 @@ (define_c_enum "unspec" [
 
   ;; IRET support
   UNSPEC_INTERRUPT_RETURN
+
+  ;; For MOVDIRI and MOVDIR64B support
+  UNSPEC_MOVDIRI
+  UNSPEC_MOVDIR64B
 ])
 
 (define_c_enum "unspecv" [
@@ -280,10 +284,6 @@ (define_c_enum "unspecv" [
   UNSPECV_SETSSBSY
   UNSPECV_CLRSSBSY
 
-  ;; For MOVDIRI and MOVDIR64B support
-  UNSPECV_MOVDIRI
-  UNSPECV_MOVDIR64B
-
   ;; For TSXLDTRK support
   UNSPECV_XSUSLDTRK
   UNSPECV_XRESLDTRK
@@ -21531,17 +21531,17 @@ (define_insn "wbnoinvd"
 ;; MOVDIRI and MOVDIR64B
 
 (define_insn "movdiri"
-  [(unspec_volatile:SWI48 [(match_operand:SWI48 0 "memory_operand" "m")
-  (match_operand:SWI48 1 "register_operand" "r")]
- UNSPECV_MOVDIRI)]
+  [(set (match_operand:SWI48 0 "memory_operand" "=m")
+   (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
+ UNSPEC_MOVDIRI))]
   "TARGET_MOVDIRI"
   "movdiri\t{%1, %0|%0, %1}"
   [(set_attr "type" "other")])
 
 (define_insn "@movdir64b_"
-  [(unspec_volatile:XI [(match_operand:P 0 "register_operand" "r")
-   (match_operand:XI 1 "memory_operand")]
-  UNSPECV_MOVDIR64B)]
+  [(set (mem:XI (match_operand:P 0 "register_operand" "r"))
+   (unspec:XI [(match_operand:XI 1 "memory_operand" "m")]
+  UNSPEC_MOVDIR64B))]
   "TARGET_MOVDIR64B"
   "movdir64b\t{%1, %0|%0, %1}"
   [(set_attr "type" "other")])
diff --git a/gcc/testsuite/gcc.target/i386/movdir64b.c 
b/gcc/testsuite/gcc.target/i386/movdir64b.c
new file mode 100644
index 000..70d872ec5d5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/movdir64b.c
@@ -0,0 +1,23 @@
+/* { dg-do run { target movdir } } */
+/* { dg-options "-mmovdir64b -O2" } */
+
+#include 
+#include 
+
+unsigned long long int src[8] = {1, 2, 3, 4, 5, 6, 7, 8};
+unsigned long long int dest[8] __attribute__ ((aligned (64)))
+  = {-1, -1, -1, -1, -1, -1, -1, -1};
+
+int
+main ()
+{
+  if (!__builtin_cpu_supports ("movdir64b"))
+return 0;
+
+  _movdir64b (dest, src);
+
+  if (memcmp (dest, src, sizeof (dest)) != 0)
+abort ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/movdiri32.c 
b/gcc/testsuite/gcc.target/i386/movdiri32.c
new file mode 100644
index 000..604fa0dd9e9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/movdiri32.c
@@ -0,0 +1,20 @@
+/* { dg-do run { target movdir } } */
+/* { dg-options "-mmovdiri -O2" } */
+
+#include 
+
+unsigned int dest = -1;
+
+int
+main ()
+{
+  if (!__builtin_cpu_supports ("movdiri"))
+return 0;
+
+  _directstoreu_u32 (, 0xbadbeef);
+
+  if (dest != 0xbadbeef)
+abort ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/movdiri64.c 
b/gcc/testsuite/gcc.target/i386/movdiri64.c
new file mode 100644
index 000..551b98e8d08
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/movdiri64.c
@@ -0,0 +1,20 @@
+/* { dg-do run { target { movdir && { ! ia32 } } } } */
+/* { dg-options "-mmovdiri -O2" } */
+
+#include 
+
+unsigned long long int dest = -1LL;
+
+int
+main ()
+{
+  if (!__builtin_cpu_supports ("movdiri"))
+return 0;
+
+  _directstoreu_u64 (, 0x12345678badbeef);
+
+  if (dest != 0x12345678badbeef)
+abort ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index 0a00972edb5..2ccb4e61bc9 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -10519,3 +10519,20 @@ proc check_effective_target_ident_directive {} {
int i;