Re: [RS6000] PR89271, gcc.target/powerpc/vsx-simode2.c

2019-05-07 Thread Alan Modra
This is https://gcc.gnu.org/ml/gcc-patches/2019-03/msg01299.html with
the fixes Segher requested, plus a few more:
- delete PREFERRED_RELOAD_CLASS changes
- adjust for recent register renumbering
- use defines rather than hard coding register numbers
- flip altivec/float test when dealing with moves within vsx regs,
  so that the altivec hard reg count is preferred over the fp hard reg
  count when both reg types are possible.
- use 2 for power9 direct move cost, and remove more '?'s from insns.
- use reg_class_subset_p in the test for slow LR/CTR moves

Bootstrapped and regression tested powerpc64le-linux.  OK for mainline?

PR target/89271
* config/rs6000/rs6000.h (enum reg_class, REG_CLASS_NAMES),
(REG_CLASS_CONTENTS): Add GEN_OR_VSX_REGS class.
* config/rs6000/rs6000.c (rs6000_register_move_cost): Correct
cost for general <-> vsx when direct moves are available.
Cost union classes at minimal cost for any reg in the class.
Correct calculation for moves between vsx, float, and altivec.
Don't return a low cost for moves between special regs.  Don't
use hard coded register numbers.
(TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS): Define.
(rs6000_ira_change_pseudo_allocno_class): New function.
* config/rs6000/rs6000.md (movsi_internal1, mov_internal),
(movdi_internal32, movdi_internal64): Remove '*' from vsx register
alternatives.
(movsi_internal1): Don't disparage vector alternatives.
(mov_internal): Likewise, excepting alternative that
will be split.
* config/rs6000/vsx.md (vsx_splat__reg): Don't disparage
we <- b alternative.

diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 5d5765d89b2..e7c63c263ae 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -1729,6 +1729,9 @@ static const struct attribute_spec 
rs6000_attribute_table[] =
 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
 #undef TARGET_MEMORY_MOVE_COST
 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
+#undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
+#define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
+  rs6000_ira_change_pseudo_allocno_class
 #undef TARGET_CANNOT_COPY_INSN_P
 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
 #undef TARGET_RTX_COSTS
@@ -34648,22 +34651,54 @@ rs6000_register_move_cost (machine_mode mode,
   reg_class_t from, reg_class_t to)
 {
   int ret;
+  reg_class_t rclass;
 
   if (TARGET_DEBUG_COST)
 dbg_cost_ctrl++;
 
+  /* If we have VSX, we can easily move between FPR or Altivec registers,
+ otherwise we can only easily move within classes.
+ Do this first so we give best-case answers for union classes
+ containing both gprs and vsx regs.  */
+  HARD_REG_SET to_vsx, from_vsx;
+  COPY_HARD_REG_SET (to_vsx, reg_class_contents[to]);
+  AND_HARD_REG_SET (to_vsx, reg_class_contents[VSX_REGS]);
+  COPY_HARD_REG_SET (from_vsx, reg_class_contents[from]);
+  AND_HARD_REG_SET (from_vsx, reg_class_contents[VSX_REGS]);
+  if (!hard_reg_set_empty_p (to_vsx)
+  && !hard_reg_set_empty_p (from_vsx)
+  && (TARGET_VSX
+ || hard_reg_set_intersect_p (to_vsx, from_vsx)))
+{
+  int reg = FIRST_FPR_REGNO;
+  if (TARGET_VSX
+ || (TEST_HARD_REG_BIT (to_vsx, FIRST_ALTIVEC_REGNO)
+ && TEST_HARD_REG_BIT (from_vsx, FIRST_ALTIVEC_REGNO)))
+   reg = FIRST_ALTIVEC_REGNO;
+  ret = 2 * hard_regno_nregs (reg, mode);
+}
+
   /*  Moves from/to GENERAL_REGS.  */
-  if (reg_classes_intersect_p (to, GENERAL_REGS)
-  || reg_classes_intersect_p (from, GENERAL_REGS))
+  else if ((rclass = from, reg_classes_intersect_p (to, GENERAL_REGS))
+  || (rclass = to, reg_classes_intersect_p (from, GENERAL_REGS)))
 {
-  reg_class_t rclass = from;
-
-  if (! reg_classes_intersect_p (to, GENERAL_REGS))
-   rclass = to;
-
   if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
-   ret = (rs6000_memory_move_cost (mode, rclass, false)
-  + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
+   {
+ if (TARGET_DIRECT_MOVE)
+   {
+ if (rs6000_tune != PROCESSOR_POWER9)
+   ret = 4 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
+ else
+   ret = 2 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
+ /* SFmode requires a conversion when moving between gprs
+and vsx.  */
+ if (mode == SFmode)
+   ret += 2;
+   }
+ else
+   ret = (rs6000_memory_move_cost (mode, rclass, false)
+  + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
+   }
 
   /* It's more expensive to move CR_REGS than CR0_REGS because of the
 shift.  */
@@ -34676,24 +34711,14 @@ rs6000_register_move_cost (machine_mode mode,
|| rs6000_tune == 

libgo patch committed: Handle direct interface type receiver in Value.call

2019-05-07 Thread Ian Lance Taylor
This libgo patch by Cherry Zhang fixes the reflect package to
correctly handle direct interface typed receiver in Value.call.  A
direct interface type's value method takes a value receiver now.
Don't pass a pointer to the method function.  Bootstrapped and ran Go
testsuite on x86_64-pc-linux-gnu.  Committed to mainline.

Ian
Index: gcc/go/gofrontend/MERGE
===
--- gcc/go/gofrontend/MERGE (revision 270999)
+++ gcc/go/gofrontend/MERGE (working copy)
@@ -1,4 +1,4 @@
-e3ba8828baf60343316bb68002e94570ee63ad1e
+fcbf847c3bf76fb475c9020e1c57057134407263
 
 The first line of this file holds the git revision number of the last
 merge done from the gofrontend repository.
Index: libgo/go/reflect/value.go
===
--- libgo/go/reflect/value.go   (revision 270877)
+++ libgo/go/reflect/value.go   (working copy)
@@ -401,7 +401,7 @@ func (v Value) call(op string, in []Valu
if v.flag != 0 {
nin++
}
-   firstPointer := len(in) > 0 && t.In(0).Kind() != Ptr && 
v.flag != 0
+   firstPointer := len(in) > 0 && ifaceIndir(t.In(0).common()) && 
v.flag != 0
params := make([]unsafe.Pointer, nin)
off := 0
if v.flag != 0 {


Go patch committed: Handle value method of direct interface type

2019-05-07 Thread Ian Lance Taylor
This patch to the Go frontend by Cherry Zhang avoids using a double
pointer for the value method of a direct interface type.  For a direct
interface type T with a value method M, its pointer type (*T)'s method
table includes a stub method of M which takes a (*T) as the receiver
instead of a T.  However, for the "typ" field of the method table
entry, we added another layer of indirection, which makes it appear to
take a **T, which is wrong.  This causes problems when using
reflect.Type.Method to get the method.  This CL fixes the second,
incorrect, indirection.  A test case for this can be found in
https://golang.org/cl/175880.  Bootstrapped and ran Go testsuite on
x86_64-pc-linux-gnu.  Committed to mainline.

Ian
Index: gcc/go/gofrontend/MERGE
===
--- gcc/go/gofrontend/MERGE (revision 270993)
+++ gcc/go/gofrontend/MERGE (working copy)
@@ -1,4 +1,4 @@
-dc9c1b43753f392fdc2045bcb7a4abaa44fe79f1
+e3ba8828baf60343316bb68002e94570ee63ad1e
 
 The first line of this file holds the git revision number of the last
 merge done from the gofrontend repository.
Index: gcc/go/gofrontend/types.cc
===
--- gcc/go/gofrontend/types.cc  (revision 270877)
+++ gcc/go/gofrontend/types.cc  (working copy)
@@ -3440,14 +3440,15 @@ Type::method_constructor(Gogo*, Type* me
   vals->push_back(Expression::make_unary(OPERATOR_AND, s, bloc));
 }
 
-  Named_object* no =
-((this->points_to() != NULL
-  && this->points_to()->is_direct_iface_type()
-  && m->is_value_method())
- ? m->iface_stub_object()
- : (m->needs_stub_method()
-? m->stub_object()
-: m->named_object()));
+  bool use_direct_iface_stub =
+this->points_to() != NULL
+&& this->points_to()->is_direct_iface_type()
+&& m->is_value_method();
+  Named_object* no = (use_direct_iface_stub
+  ? m->iface_stub_object()
+  : (m->needs_stub_method()
+ ? m->stub_object()
+ : m->named_object()));
 
   Function_type* mtype;
   if (no->is_function())
@@ -3463,7 +3464,8 @@ Type::method_constructor(Gogo*, Type* me
 
   ++p;
   go_assert(p->is_field_name("typ"));
-  bool want_pointer_receiver = !only_value_methods && m->is_value_method();
+  bool want_pointer_receiver = (!only_value_methods && m->is_value_method()
+&& !use_direct_iface_stub);
   nonmethod_type = mtype->copy_with_receiver_as_param(want_pointer_receiver);
   vals->push_back(Expression::make_type_descriptor(nonmethod_type, bloc));
 


Re: [Patch] Fix ix86_expand_sse_comi_round (PR Target/89750, PR Target/86444)

2019-05-07 Thread Hongtao Liu
Any other comments, i'll merge to trunk?

On Tue, May 7, 2019 at 3:31 PM Hongtao Liu  wrote:
>
> On Tue, May 7, 2019 at 3:03 PM Jakub Jelinek  wrote:
> >
> > On Tue, May 07, 2019 at 01:38:49PM +0800, Hongtao Liu wrote:
> > > +2019-05-06  H.J. Lu  
> > > + Hongtao Liu  
> > > +
> > > + PR Target/89750
> > > + PR Target/86444
> >
> > target, not Target.  Various people handle these in various scripts,
> > so it is better to use consistency and exact spelling of the categories.
> >
> > Jakub
>
> Ok, Thank you for your reminding.
>
> --
> BR,
> Hongtao



-- 
BR,
Hongtao


Re: Enable BF16 support (Please ignore my former email)

2019-05-07 Thread Hongtao Liu
On Wed, May 8, 2019 at 2:33 AM Uros Bizjak  wrote:
>
> On Tue, May 7, 2019 at 8:49 AM Hongtao Liu  wrote:
>
> > > > > > > > > > This patch is about to enable support for bfloat16 
> > > > > > > > > > which will be in Future Cooper Lake, Please refer to 
> > > > > > > > > > https://software.intel.com/en-us/download/intel-architecture-instruction-set-extensions-programming-reference
> > > > > > > > > > for more details about BF16.
> > > > > > > > > >
> > > > > > > > > > There are 3 instructions for AVX512BF16: VCVTNE2PS2BF16, 
> > > > > > > > > > VCVTNEPS2BF16 and DPBF16PS instructions, which are Vector 
> > > > > > > > > > Neural Network Instructions supporting:
> > > > > > > > > >
> > > > > > > > > > -   VCVTNE2PS2BF16: Convert Two Packed Single Data to 
> > > > > > > > > > One Packed BF16 Data.
> > > > > > > > > > -   VCVTNEPS2BF16: Convert Packed Single Data to Packed 
> > > > > > > > > > BF16 Data.
> > > > > > > > > > -   VDPBF16PS: Dot Product of BF16 Pairs Accumulated 
> > > > > > > > > > into Packed Single Precision.
> > > > > > > > > >
> > > > > > > > > > Since only BF16 intrinsics are supported, we treat it as HI 
> > > > > > > > > > for simplicity.
> > > > > > > > >
> > > > > > > > > I think it was a mistake declaring cvtps2ph and cvtph2ps 
> > > > > > > > > using HImode
> > > > > > > > > instead of HFmode. Is there a compelling reason not to 
> > > > > > > > > introduce
> > > > > > > > > corresponding bf16_format supporting infrastructure and 
> > > > > > > > > declare these
> > > > > > > > > intrinsics using half-binary (HBmode ?) mode instead?
> > > > > > > > >
> > > > > > > > > Uros.
> > > > > > > >
> > > > > > > > Bfloat16 isn't IEEE standard which we want to reserve HFmode 
> > > > > > > > for.
> > > > > > >
> > > > > > > True.
> > > > > > >
> > > > > > > > The IEEE 754 standard specifies a binary16 as having the 
> > > > > > > > following format:
> > > > > > > > Sign bit: 1 bit
> > > > > > > > Exponent width: 5 bits
> > > > > > > > Significand precision: 11 bits (10 explicitly stored)
> > > > > > > >
> > > > > > > > Bfloat16 has the following format:
> > > > > > > > Sign bit: 1 bit
> > > > > > > > Exponent width: 8 bits
> > > > > > > > Significand precision: 8 bits (7 explicitly stored), as opposed 
> > > > > > > > to 24
> > > > > > > > bits in a classical single-precision floating-point format
> > > > > > >
> > > > > > > This is why I proposed to introduce HBmode (and corresponding
> > > > > > > bfloat16_format) to distingush between ieee HFmode and BFmode.
> > > > > > >
> > > > > >
> > > > > > Unless there is BF16 language level support,  HBmode has no 
> > > > > > advantage
> > > > > > over HImode.   We can add HBmode when we gain BF16 language support.
> > > > > >
> > > > > > --
> > > > > > H.J.
> > > > >
> > > > > Any other comments, I'll merge this to trunk?
> > > >
> > > > It is not a regression, so please no.
> > >
> > > Ehm, "regression fix" ...
> > >
> > > Uros.
> >
> > Update patch.
>
> Index: gcc/config/i386/i386-builtins.c
> ===
> --- gcc/config/i386/i386-builtins.c(revision 270934)
> +++ gcc/config/i386/i386-builtins.c(working copy)
> @@ -1920,6 +1920,7 @@
>F_VPCLMULQDQ,
>F_AVX512VNNI,
>F_AVX512BITALG,
> +  F_AVX512BF16,
>F_MAX
>  };
>
> @@ -2064,7 +2065,8 @@
>{"gfni",F_GFNI,P_ZERO},
>{"vpclmulqdq", F_VPCLMULQDQ, P_ZERO},
>{"avx512vnni", F_AVX512VNNI, P_ZERO},
> -  {"avx512bitalg", F_AVX512BITALG, P_ZERO}
> +  {"avx512bitalg", F_AVX512BITALG, P_ZERO},
> +  {"avx512bf16", F_AVX512BF16, P_ZERO}
>  };
>
>  /* This parses the attribute arguments to target in DECL and determines
>
> You also need to update cpuinfo.h and cpuinfo.c in libgcc/config/i386
> with avx512bf16, plus relevant test files.
>
> Index: gcc/testsuite/gcc.target/i386/avx-1.c
> Index: gcc/testsuite/gcc.target/i386/avx-2.c
>
> No need to update above two files, sse-*.c changes are enough to cover
> new functionality.
>
> Otherwise LGTM, but please repost updated patch with the ChangeLog
> entry (please see [1]).
>
> [1] https://www.gnu.org/software/gcc/contribute.html#patches
>
> Uros.

Update patch:
1. Add Changelog.
2. Update libgcc part.

-- 
BR,
Hongtao
Index: gcc/ChangeLog
===
--- gcc/ChangeLog	(revision 270997)
+++ gcc/ChangeLog	(working copy)
@@ -1,3 +1,36 @@
+2019-05-07  Wei Xiao  
+	* common/config/i386/i386-common.c (OPTION_MASK_ISA_AVX512BF16_SET
+	OPTION_MASK_ISA_AVX512BF16_UNSET, OPTION_MASK_ISA2_AVX512BW_UNSET): New.
+	(OPTION_MASK_ISA2_AVX512F_UNSET): Add OPTION_MASK_ISA_AVX512BF16_UNSET.
+	(ix86_handle_option): Handle -mavx512bf16.
+	* config.gcc: Add avx512bf16vlintrin.h and avx512bf16intrin.h
+	to extra_headers.
+	* config/i386/avx512bf16vlintrin.h: New.
+	* config/i386/avx512bf16intrin.h: New.
+	* config/i386/cpuid.h (bit_AVX512BF16): New.
+	* config/i386/driver-i386.c (host_detect_local_cpu): Detect BF16.
+	* 

Go patch committed: Add option to emit optimization information

2019-05-07 Thread Ian Lance Taylor
This patch to the Go frontend by Cherry Zhang adds a
-fgo-debug-optimization option to emit optimization diagnostics. This
can be used for testing optimizations. Apply this to the range clear
optimizations of maps and arrays.  Bootstrapped and ran Go testsuite
on x86_64-pc-linux-gnu.  Committed to mainline.

Ian

gcc/go:

2019-05-07  Cherry Zhang  

* lang.opt (-fgo-debug-optimization): New option.
* go-c.h (struct go_create_gogo_args): Add debug_optimization
field.
* go-lang.c (go_langhook_init): Set debug_optimization field.
* gccgo.texi (Invoking gccgo): Document -fgo-debug-optimization.

gcc/testsuite:

2019-05-07  Cherry Zhang  

* go.dg/arrayclear.go: New test.
* go.dg/mapclear.go: New test.
Index: gcc/go/gccgo.texi
===
--- gcc/go/gccgo.texi   (revision 270877)
+++ gcc/go/gccgo.texi   (working copy)
@@ -246,6 +246,11 @@ This runs escape analysis only on functi
 that match the given suffix @var{n}.  This can be used to binary
 search across functions to uncover escape analysis bugs.
 
+@item -fgo-debug-optimization
+@cindex @option{-fgo-debug-optimization}
+@cindex @option{-fno-go-debug-optimization}
+Output optimization diagnostics.
+
 @item -fgo-c-header=@var{file}
 @cindex @option{-fgo-c-header}
 Write top-level named Go struct definitions to @var{file} as C code.
Index: gcc/go/go-c.h
===
--- gcc/go/go-c.h   (revision 270877)
+++ gcc/go/go-c.h   (working copy)
@@ -49,6 +49,7 @@ struct go_create_gogo_args
   int debug_escape_level;
   const char* debug_escape_hash;
   int64_t nil_check_size_threshold;
+  bool debug_optimization;
 };
 
 extern void go_create_gogo (const struct go_create_gogo_args*);
Index: gcc/go/go-lang.c
===
--- gcc/go/go-lang.c(revision 270877)
+++ gcc/go/go-lang.c(working copy)
@@ -118,6 +118,7 @@ go_langhook_init (void)
   args.debug_escape_level = go_debug_escape_level;
   args.debug_escape_hash = go_debug_escape_hash;
   args.nil_check_size_threshold = TARGET_AIX ? -1 : 4096;
+  args.debug_optimization = go_debug_optimization;
   args.linemap = go_get_linemap();
   args.backend = go_get_backend();
   go_create_gogo ();
Index: gcc/go/gofrontend/MERGE
===
--- gcc/go/gofrontend/MERGE (revision 270877)
+++ gcc/go/gofrontend/MERGE (working copy)
@@ -1,4 +1,4 @@
-4b3015de639cf22ed11ff96097555700909827c8
+dc9c1b43753f392fdc2045bcb7a4abaa44fe79f1
 
 The first line of this file holds the git revision number of the last
 merge done from the gofrontend repository.
Index: gcc/go/gofrontend/go.cc
===
--- gcc/go/gofrontend/go.cc (revision 270877)
+++ gcc/go/gofrontend/go.cc (working copy)
@@ -44,6 +44,8 @@ go_create_gogo(const struct go_create_go
   if (args->debug_escape_hash != NULL)
 ::gogo->set_debug_escape_hash(args->debug_escape_hash);
   ::gogo->set_nil_check_size_threshold(args->nil_check_size_threshold);
+  if (args->debug_optimization)
+::gogo->set_debug_optimization(args->debug_optimization);
 }
 
 // Parse the input files.
Index: gcc/go/gofrontend/gogo.cc
===
--- gcc/go/gofrontend/gogo.cc   (revision 270877)
+++ gcc/go/gofrontend/gogo.cc   (working copy)
@@ -55,6 +55,7 @@ Gogo::Gogo(Backend* backend, Linemap* li
 check_divide_overflow_(true),
 compiling_runtime_(false),
 debug_escape_level_(0),
+debug_optimization_(false),
 nil_check_size_threshold_(4096),
 verify_types_(),
 interface_types_(),
Index: gcc/go/gofrontend/gogo.h
===
--- gcc/go/gofrontend/gogo.h(revision 270877)
+++ gcc/go/gofrontend/gogo.h(working copy)
@@ -326,6 +326,16 @@ class Gogo
   set_debug_escape_hash(const std::string& s)
   { this->debug_escape_hash_ = s; }
 
+  // Return whether to output optimization diagnostics.
+  bool
+  debug_optimization() const
+  { return this->debug_optimization_; }
+
+  // Set the option to output optimization diagnostics.
+  void
+  set_debug_optimization(bool b)
+  { this->debug_optimization_ = b; }
+
   // Return the size threshold used to determine whether to issue
   // a nil-check for a given pointer dereference. A threshold of -1
   // implies that all potentially faulting dereference ops should
@@ -1075,6 +1085,9 @@ class Gogo
   // -fgo-debug-escape-hash option. The analysis is run only on
   // functions with names that hash to the matching value.
   std::string debug_escape_hash_;
+  // Whether to output optimization diagnostics, from the
+  // -fgo-debug-optimization option.
+  bool debug_optimization_;
   // Nil-check size threshhold.
   int64_t nil_check_size_threshold_;
   // A list of types to verify.
Index: gcc/go/gofrontend/statements.cc

[PATCH] Improve API docs Filesystem TS and Networking TS

2019-05-07 Thread Jonathan Wakely

More Doxygenation.

Tested powerpc64le-linux. Committed to trunk.


commit 4fc9ce4407954cf6601e80685c30bb3b7318f025
Author: Jonathan Wakely 
Date:   Tue May 7 23:58:33 2019 +0100

Improve API docs for Filesystem TS and Networking TS

* include/experimental/bits/fs_path.h: Improve docs.
* include/experimental/bits/net.h: Fix wrong header name in comment.
Do not document implementation details.
* include/experimental/netfwd: Fix doxygen grouping.

diff --git a/libstdc++-v3/include/experimental/bits/fs_path.h b/libstdc++-v3/include/experimental/bits/fs_path.h
index fafc9930a5b..f81f33ca161 100644
--- a/libstdc++-v3/include/experimental/bits/fs_path.h
+++ b/libstdc++-v3/include/experimental/bits/fs_path.h
@@ -514,8 +514,10 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
 
   /// @relates std::experimental::filesystem::path @{
 
+  /// Swap overload for paths
   inline void swap(path& __lhs, path& __rhs) noexcept { __lhs.swap(__rhs); }
 
+  /// Compute a hash value for a path
   size_t hash_value(const path& __p) noexcept;
 
   /// Compare paths
diff --git a/libstdc++-v3/include/experimental/bits/net.h b/libstdc++-v3/include/experimental/bits/net.h
index d5ef277b933..1d637fb9b83 100644
--- a/libstdc++-v3/include/experimental/bits/net.h
+++ b/libstdc++-v3/include/experimental/bits/net.h
@@ -24,7 +24,7 @@
 
 /** @file experimental/bits/net.h
  *  This is an internal header file, included by other library headers.
- *  Do not attempt to use it directly. @headername{experimental/networking}
+ *  Do not attempt to use it directly. @headername{experimental/net}
  */
 
 #ifndef _GLIBCXX_EXPERIMENTAL_NET_H
@@ -55,6 +55,8 @@ inline namespace v1
   template
 class async_result;
 
+  /// @cond undocumented
+
   // A type denoted by DEDUCED in the TS.
   template
 using __deduced_t = typename
@@ -67,25 +69,27 @@ inline namespace v1
   is_constructible<_Tp, _Tp&>, is_constructible<_Tp, const _Tp&&>
   >::type;
 
-struct __throw_on_error
+  struct __throw_on_error
+  {
+explicit
+__throw_on_error(const char* __msg) : _M_msg(__msg) { }
+
+~__throw_on_error() noexcept(false)
 {
-  explicit
-  __throw_on_error(const char* __msg) : _M_msg(__msg) { }
+  if (_M_ec)
+	_GLIBCXX_THROW_OR_ABORT(system_error(_M_ec, _M_msg));
+}
 
-  ~__throw_on_error() noexcept(false)
-  {
-	if (_M_ec)
-	  _GLIBCXX_THROW_OR_ABORT(system_error(_M_ec, _M_msg));
-  }
+__throw_on_error(const __throw_on_error&) = delete;
+__throw_on_error& operator=(const __throw_on_error&) = delete;
 
-  __throw_on_error(const __throw_on_error&) = delete;
-  __throw_on_error& operator=(const __throw_on_error&) = delete;
+operator error_code&() noexcept { return _M_ec; }
 
-  operator error_code&() noexcept { return _M_ec; }
+const char* _M_msg;
+error_code _M_ec;
+  };
 
-  const char* _M_msg;
-  error_code _M_ec;
-};
+  /// @endcond
 
   // Base class for types meeting IntegerSocketOption requirements.
   template
diff --git a/libstdc++-v3/include/experimental/netfwd b/libstdc++-v3/include/experimental/netfwd
index f8f4479b6c8..6a7ac4ae281 100644
--- a/libstdc++-v3/include/experimental/netfwd
+++ b/libstdc++-v3/include/experimental/netfwd
@@ -96,7 +96,7 @@ inline namespace v1
 namespace ip
 {
   /**
-   * @ingroup networking-ts
+   * @addtogroup networking-ts
* @{
*/
 class address;


[PATCH] Improve API docs for std::pair

2019-05-07 Thread Jonathan Wakely

More Doxygenation.

Tested powerpc64le-linux. Committed to trunk.


commit 591cd4fa016959cad3665719b44a40b87df86729
Author: Jonathan Wakely 
Date:   Tue May 7 23:33:31 2019 +0100

Improve API docs for std::pair

* include/bits/stl_pair.h: Improve docs.
* include/std/tuple: Likewise.

diff --git a/libstdc++-v3/include/bits/stl_pair.h b/libstdc++-v3/include/bits/stl_pair.h
index 56565fd1ece..f99b774c21e 100644
--- a/libstdc++-v3/include/bits/stl_pair.h
+++ b/libstdc++-v3/include/bits/stl_pair.h
@@ -72,13 +72,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
*/
 
 #if __cplusplus >= 201103L
-  /// piecewise_construct_t
+  /// Tag type for piecewise construction of std::pair objects.
   struct piecewise_construct_t { explicit piecewise_construct_t() = default; };
 
-  /// piecewise_construct
+  /// Tag for piecewise construction of std::pair objects.
   _GLIBCXX17_INLINE constexpr piecewise_construct_t piecewise_construct =
 piecewise_construct_t();
 
+  /// @cond undocumented
+
   // Forward declarations.
   template
 class tuple;
@@ -198,21 +200,25 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 #endif // C++11
   };
 
+  /// @endcond
+
  /**
*  @brief Struct holding two objects of arbitrary type.
*
*  @tparam _T1  Type of first object.
*  @tparam _T2  Type of second object.
+   *
+   *  
*/
   template
 struct pair
 : private __pair_base<_T1, _T2>
 {
-  typedef _T1 first_type;/// @c first_type is the first bound type
-  typedef _T2 second_type;   /// @c second_type is the second bound type
+  typedef _T1 first_type;///< The type of the `first` member
+  typedef _T2 second_type;   ///< The type of the `second` member
 
-  _T1 first; /// @c first is a copy of the first object
-  _T2 second;/// @c second is a copy of the second object
+  _T1 first; ///< The first member
+  _T2 second;///< The second member
 
   // _GLIBCXX_RESOLVE_LIB_DEFECTS
   // 265.  std::pair::pair() effects overly restrictive
@@ -243,14 +249,17 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   : first(), second() { }
 #endif
 
-  /** Two objects may be passed to a @c pair constructor to be copied.  */
 #if __cplusplus < 201103L
+  /// Two objects may be passed to a @c pair constructor to be copied.
   pair(const _T1& __a, const _T2& __b)
   : first(__a), second(__b) { }
 #else
   // Shortcut for constraining the templates that don't take pairs.
+  /// @cond undocumented
   using _PCCP = _PCC;
+  /// @endcond
 
+  /// Construct from two const lvalues, allowing implicit conversions.
   template()
@@ -260,6 +269,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   constexpr pair(const _T1& __a, const _T2& __b)
   : first(__a), second(__b) { }
 
+  /// Construct from two const lvalues, disallowing implicit conversions.
template()
@@ -269,18 +279,21 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   explicit constexpr pair(const _T1& __a, const _T2& __b)
   : first(__a), second(__b) { }
 #endif
+  //@}
 
-  /** There is also a templated copy ctor for the @c pair class itself.  */
 #if __cplusplus < 201103L
+  /// There is also a templated constructor to convert from other pairs.
   template
 	pair(const pair<_U1, _U2>& __p)
 	: first(__p.first), second(__p.second) { }
 #else
   // Shortcut for constraining the templates that take pairs.
+  /// @cond undocumented
   template 
 using _PCCFP = _PCC::value
 			|| !is_same<_T2, _U2>::value,
 			_T1, _T2>;
+  /// @endcond
 
   template::template
@@ -299,9 +312,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
  bool>::type=false>
 	explicit constexpr pair(const pair<_U1, _U2>& __p)
 	: first(__p.first), second(__p.second) { }
+#endif
 
-  constexpr pair(const pair&) = default;
-  constexpr pair(pair&&) = default;
+#if __cplusplus >= 201103L
+  constexpr pair(const pair&) = default;	///< Copy constructor
+  constexpr pair(pair&&) = default;		///< Move constructor
 
   // DR 811.
   template,
@@ -438,6 +454,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 #endif
 };
 
+  /// @relates pair @{
+
 #if __cpp_deduction_guides >= 201606
   template pair(_T1, _T2) -> pair<_T1, _T2>;
 #endif
@@ -448,7 +466,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 operator==(const pair<_T1, _T2>& __x, const pair<_T1, _T2>& __y)
 { return __x.first == __y.first && __x.second == __y.second; }
 
-  /// 
+  /** Defines a lexicographical order for pairs.
+   *
+   * For two pairs of the same type, `P` is ordered before `Q` if
+   * `P.first` is less than `Q.first`, or if `P.first` and `Q.first`
+   * are equivalent (neither is less than the other) and `P.second` is less
+   * than `Q.second`.
+  */
   template
 

[PATCH] Improve API docs for and

2019-05-07 Thread Jonathan Wakely

More Doxygenation.

Tested powerpc64le-linux. Committed to trunk.

commit 75601d987e84e686d433b5f96f838e42165ea97c
Author: Jonathan Wakely 
Date:   Tue May 7 22:33:01 2019 +0100

Improve API docs for  and 

* doc/doxygen/doxygroups.cc (std::literals): Add documentation for
inline namespace.
* include/std/chrono: Improve docs.
* include/std/ratio: Do not document implementation details.
* testsuite/20_util/ratio/cons/cons_overflow_neg.cc: Adjust dg-error
line numbers.
* testsuite/20_util/ratio/operations/ops_overflow_neg.cc: Likewise.

diff --git a/libstdc++-v3/doc/doxygen/doxygroups.cc b/libstdc++-v3/doc/doxygen/doxygroups.cc
index 630e02296ed..94ef3bb4f65 100644
--- a/libstdc++-v3/doc/doxygen/doxygroups.cc
+++ b/libstdc++-v3/doc/doxygen/doxygroups.cc
@@ -19,6 +19,9 @@
 /** @namespace std
  *  @brief ISO C++ entities toplevel namespace is std.
 */
+/** @namespace std
+ *  @brief ISO C++ inline namespace for literal suffixes.
+*/
 /** @namespace std::__detail
  *  @brief Implementation details not part of the namespace std interface.
 */
diff --git a/libstdc++-v3/include/std/chrono b/libstdc++-v3/include/std/chrono
index b7c1d750067..1f2c8799b5b 100644
--- a/libstdc++-v3/include/std/chrono
+++ b/libstdc++-v3/include/std/chrono
@@ -24,6 +24,7 @@
 
 /** @file include/chrono
  *  This is a Standard C++ Library header.
+ *  @ingroup chrono
  */
 
 #ifndef _GLIBCXX_CHRONO
@@ -67,6 +68,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
   // 20.11.4.3 specialization of common_type (for duration, sfinae-friendly)
 
+  /// @cond undocumented
+
   template
 struct __duration_common_type
 { };
@@ -90,6 +93,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 struct __duration_common_type<__failure_type, _Period1, _Period2>
 { typedef __failure_type type; };
 
+  /// @endcond
+
+  /// Specialization of common_type for chrono::duration types.
+  /// @relates duration
   template
 struct common_type,
 		   chrono::duration<_Rep2, _Period2>>
@@ -98,6 +105,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
   // 20.11.4.3 specialization of common_type (for time_point, sfinae-friendly)
 
+  /// @cond undocumented
+
   template
 struct __timepoint_common_type
 { };
@@ -108,14 +117,25 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   using type = chrono::time_point<_Clock, typename _CT::type>;
 };
 
+  /// @endcond
+
+  /// Specialization of common_type for chrono::time_point types.
+  /// @relates time_point
   template
 struct common_type,
 		   chrono::time_point<_Clock, _Duration2>>
 : __timepoint_common_type, _Clock>
 { };
 
+  // @} group chrono
+
   namespace chrono
   {
+/// @addtogroup chrono
+/// @{
+
+/// @cond undocumented
+
 // Primary template for duration_cast impl.
 template
@@ -188,6 +208,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   using __disable_if_is_duration
 	= typename enable_if::value, _Tp>::type;
 
+/// @endcond
+
 /// duration_cast
 template
   constexpr __enable_if_is_duration<_ToDur>
@@ -288,6 +310,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	{ return numeric_limits<_Rep>::lowest(); }
   };
 
+/// @cond undocumented
+
 template
   struct __is_ratio
   : std::false_type
@@ -298,6 +322,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   : std::true_type
   { };
 
+/// @endcond
+
 /// duration
 template
   struct duration
@@ -446,6 +472,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	rep __r;
   };
 
+/// @relates duration @{
+
+/// The sum of two durations.
 template
   constexpr typename common_type,
@@ -459,6 +488,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	return __cd(__cd(__lhs).count() + __cd(__rhs).count());
   }
 
+/// The difference between two durations.
 template
   constexpr typename common_type,
@@ -472,6 +502,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	return __cd(__cd(__lhs).count() - __cd(__rhs).count());
   }
 
+/// @}
+
+/// @cond undocumented
+
 // SFINAE helper to obtain common_type<_Rep1, _Rep2> only if _Rep2
 // is implicitly convertible to it.
 // _GLIBCXX_RESOLVE_LIB_DEFECTS
@@ -481,6 +515,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   using __common_rep_t = typename
 	enable_if::value, _CRep>::type;
 
+/// @endcond
+
+/// @relates duration @{
+
+/// Multiply a duration by a scalar value.
 template
   constexpr duration<__common_rep_t<_Rep1, _Rep2>, _Period>
   operator*(const duration<_Rep1, _Period>& __d, const _Rep2& __s)
@@ -490,6 +529,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	return __cd(__cd(__d).count() * __s);
   }
 
+/// Multiply a duration by a scalar value.
 template
   constexpr duration<__common_rep_t<_Rep2, _Rep1>, _Period>
   operator*(const _Rep1& __s, const duration<_Rep2, _Period>& __d)
@@ -542,6 +582,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   }
 
 // comparisons
+
 template
   constexpr bool
@@ 

[PATCH] PR libstdc++/89102 implement new common_type rules (P0435R1, P0548R1)

2019-05-07 Thread Jonathan Wakely

This change ensures that std::common_type<> is a complete type (LWG
2408), and that std::common_type, std::common_type, and
std::common_type will use program-defined specializations
for std::common_type (LWG 2465).

The implementation of common_type is changed to use
void_t, and the specializations for duration and time_point are modified
to also use void_t instead of depending on implementation details of
common_type.

PR libstdc++/89102
* doc/xml/manual/intro.xml: Document DR 2408 and 2465 changes.
* include/std/chrono (__duration_common_type_wrapper): Replace with ...
(__duration_common_type): New helper.
(common_type, chrono::duration>): Use
__duration_common_type.
(__timepoint_common_type_wrapper): Replace with ...
(__timepoint_common_type): New helper.
(common_type, chrono::time_point>):
Use __time_point_common_type.
* include/std/type_traits (common_type<>): Define, as per LWG 2408.
(__common_type_impl): If either argument is transformed by decay,
use the common_type of the decayed types.
(__common_type_impl<_Tp, _Up, _Tp, _Up>): If the types are already
decayed, use __do_common_type_impl to get the common_type.
(common_type<_Tp>): Use common_type<_Tp, _Tp>.
(__do_member_type_wrapper, __member_type_wrapper)
(__expanded_common_type_wrapper): Remove.
(__common_type_pack, __common_type_fold): New helpers.
(common_type<_Tp, _Up, _Vp...>): Use new helpers instead of
__member_type_wrapper and __expanded_common_type_wrapper.
* testsuite/20_util/common_type/requirements/explicit_instantiation.cc:
Test zero-length template argument list.
* testsuite/20_util/common_type/requirements/sfinae_friendly_1.cc:
Test single argument cases and argument types that should decay.
* testsuite/20_util/common_type/requirements/sfinae_friendly_2.cc:
Adjust expected error.
* testsuite/20_util/duration/literals/range_neg.cc: Use zero for
dg-error lineno.
* testsuite/20_util/duration/requirements/typedefs_neg1.cc: Likewise.
* testsuite/20_util/duration/requirements/typedefs_neg2.cc: Likewise.
* testsuite/20_util/duration/requirements/typedefs_neg3.cc: Likewise.

Tested powerpc64le-linux, committed to trunk.


commit 3f6b849095f82205598cd5b6ce223a7a2a26bd98
Author: Jonathan Wakely 
Date:   Wed Feb 6 11:51:37 2019 +

PR libstdc++/89102 implement new common_type rules (P0435R1, P0548R1)

This change ensures that std::common_type<> is a complete type (LWG
2408), and that std::common_type, std::common_type, and
std::common_type will use program-defined specializations
for std::common_type (LWG 2465).

The implementation of common_type is changed to use
void_t, and the specializations for duration and time_point are modified
to also use void_t instead of depending on implementation details of
common_type.

PR libstdc++/89102
* doc/xml/manual/intro.xml: Document DR 2408 and 2465 changes.
* include/std/chrono (__duration_common_type_wrapper): Replace with 
...
(__duration_common_type): New helper.
(common_type, chrono::duration>): 
Use
__duration_common_type.
(__timepoint_common_type_wrapper): Replace with ...
(__timepoint_common_type): New helper.
(common_type, chrono::time_point>):
Use __time_point_common_type.
* include/std/type_traits (common_type<>): Define, as per LWG 2408.
(__common_type_impl): If either argument is transformed by decay,
use the common_type of the decayed types.
(__common_type_impl<_Tp, _Up, _Tp, _Up>): If the types are already
decayed, use __do_common_type_impl to get the common_type.
(common_type<_Tp>): Use common_type<_Tp, _Tp>.
(__do_member_type_wrapper, __member_type_wrapper)
(__expanded_common_type_wrapper): Remove.
(__common_type_pack, __common_type_fold): New helpers.
(common_type<_Tp, _Up, _Vp...>): Use new helpers instead of
__member_type_wrapper and __expanded_common_type_wrapper.
* 
testsuite/20_util/common_type/requirements/explicit_instantiation.cc:
Test zero-length template argument list.
* testsuite/20_util/common_type/requirements/sfinae_friendly_1.cc:
Test single argument cases and argument types that should decay.
* testsuite/20_util/common_type/requirements/sfinae_friendly_2.cc:
Adjust expected error.
* testsuite/20_util/duration/literals/range_neg.cc: Use zero for
dg-error lineno.
* testsuite/20_util/duration/requirements/typedefs_neg1.cc: 
Likewise.
* testsuite/20_util/duration/requirements/typedefs_neg2.cc: 
Likewise.
   

[C++ PATCH] PR c++/90171 - reorganize usual_deallocation_fn_p

2019-05-07 Thread Jason Merrill
When fixing 90171 it struck me as undesirable to have so many separate
functions that all needed to know about the definition of a usual
deallocation function.  So this patch condenses them into one.  I left
destroying_delete_p because it is used by other files as well.

Tested x86_64-pc-linux-gnu, applying to trunk.

* call.c (struct dealloc_info): New.
(usual_deallocation_fn_p): Take a dealloc_info*.
(aligned_deallocation_fn_p, sized_deallocation_fn_p): Remove.
(build_op_delete_call): Adjust.
---
 gcc/cp/call.c| 144 ---
 gcc/cp/ChangeLog |   8 +++
 2 files changed, 70 insertions(+), 82 deletions(-)

diff --git a/gcc/cp/call.c b/gcc/cp/call.c
index 1a79017eff4..20db2974b56 100644
--- a/gcc/cp/call.c
+++ b/gcc/cp/call.c
@@ -6348,63 +6348,22 @@ destroying_delete_p (tree t)
   return std_destroying_delete_t_p (type) ? type : NULL_TREE;
 }
 
-/* Returns true iff T, an element of an OVERLOAD chain, is a usual deallocation
-   function (3.7.4.2 [basic.stc.dynamic.deallocation]) with a parameter of
-   std::align_val_t.  */
-
-static bool
-aligned_deallocation_fn_p (tree t)
+struct dealloc_info
 {
-  if (!aligned_new_threshold)
-return false;
-
-  /* A template instance is never a usual deallocation function,
- regardless of its signature.  */
-  if (TREE_CODE (t) == TEMPLATE_DECL
-  || primary_template_specialization_p (t))
-return false;
-
-  tree a = FUNCTION_ARG_CHAIN (t);
-  if (destroying_delete_p (t))
-a = TREE_CHAIN (a);
-  if (same_type_p (TREE_VALUE (a), align_type_node)
-  && TREE_CHAIN (a) == void_list_node)
-return true;
-  if (!same_type_p (TREE_VALUE (a), size_type_node))
-return false;
-  a = TREE_CHAIN (a);
-  if (a && same_type_p (TREE_VALUE (a), align_type_node)
-  && TREE_CHAIN (a) == void_list_node)
-return true;
-  return false;
-}
+  bool sized;
+  bool aligned;
+  tree destroying;
+};
 
-/* Returns true if FN is a usual deallocation fn with a size_t parameter.  */
+/* Returns true iff T, an element of an OVERLOAD chain, is a usual deallocation
+   function (3.7.4.2 [basic.stc.dynamic.deallocation]).  If so, and DI is
+   non-null, also set *DI. */
 
 static bool
-sized_deallocation_fn_p (tree fn)
+usual_deallocation_fn_p (tree t, dealloc_info *di)
 {
-  tree t = FUNCTION_ARG_CHAIN (fn);
-  if (destroying_delete_p (fn))
-t = TREE_CHAIN (t);
-  if (!t || !same_type_p (TREE_VALUE (t), size_type_node))
-return false;
-  t = TREE_CHAIN (t);
-  if (t == void_list_node)
-return true;
-  if (aligned_new_threshold && t
-  && same_type_p (TREE_VALUE (t), align_type_node)
-  && TREE_CHAIN (t) == void_list_node)
-return true;
-  return false;
-}
+  if (di) *di = dealloc_info();
 
-/* Returns true iff T, an element of an OVERLOAD chain, is a usual
-   deallocation function (3.7.4.2 [basic.stc.dynamic.deallocation]).  */
-
-bool
-usual_deallocation_fn_p (tree t)
-{
   /* A template instance is never a usual deallocation function,
  regardless of its signature.  */
   if (TREE_CODE (t) == TEMPLATE_DECL
@@ -6418,17 +6377,33 @@ usual_deallocation_fn_p (tree t)
  - optionally, a parameter of type std::align_val_t.  */
   bool global = DECL_NAMESPACE_SCOPE_P (t);
   tree chain = FUNCTION_ARG_CHAIN (t);
-  if (!chain)
-return false;
-  if (destroying_delete_p (t))
-chain = TREE_CHAIN (chain);
-  if (chain == void_list_node
-  || ((!global || flag_sized_deallocation)
- && sized_deallocation_fn_p (t)))
-return true;
-  if (aligned_deallocation_fn_p (t))
-return true;
-  return false;
+  if (chain && destroying_delete_p (t))
+{
+  if (di) di->destroying = TREE_VALUE (chain);
+  chain = TREE_CHAIN (chain);
+}
+  if (chain
+  && (!global || flag_sized_deallocation)
+  && same_type_p (TREE_VALUE (chain), size_type_node))
+{
+  if (di) di->sized = true;
+  chain = TREE_CHAIN (chain);
+}
+  if (chain && aligned_new_threshold
+  && same_type_p (TREE_VALUE (chain), align_type_node))
+{
+  if (di) di->aligned = true;
+  chain = TREE_CHAIN (chain);
+}
+  return (chain == void_list_node);
+}
+
+/* Just return whether FN is a usual deallocation function.  */
+
+bool
+usual_deallocation_fn_p (tree fn)
+{
+  return usual_deallocation_fn_p (fn, NULL);
 }
 
 /* Build a call to operator delete.  This has to be handled very specially,
@@ -6457,6 +6432,7 @@ build_op_delete_call (enum tree_code code, tree addr, 
tree size,
 {
   tree fn = NULL_TREE;
   tree fns, fnname, type, t;
+  dealloc_info di_fn = { };
 
   if (addr == error_mark_node)
 return error_mark_node;
@@ -6575,11 +6551,13 @@ build_op_delete_call (enum tree_code code, tree addr, 
tree size,
 for (lkp_iterator iter (MAYBE_BASELINK_FUNCTIONS (fns)); iter; ++iter)
   {
tree elt = *iter;
-   if (usual_deallocation_fn_p (elt))
+   dealloc_info di_elt;
+   if (usual_deallocation_fn_p (elt, _elt))
 

[PATCH] rs6000: Remove reload leftovers

2019-05-07 Thread Segher Boessenkool
There are a few things left in the rs6000 port that are unused now
that we do not support old reload anymore.  This removes those.

Tested on powerpc64-linux {-m32,-m64}; committing.


Segher


2019-05-07  Segher Boessenkool  

* config/rs6000/rs6000-protos.h (rs6000_legitimize_reload_address_ptr):
Delete declaration.
* config/rs6000/rs6000.c (rs6000_legitimize_reload_address): Delete.
(rs6000_debug_legitimize_reload_address): Delete.
(rs6000_legitimize_reload_address_ptr): Delete.
(rs6000_option_override_internal): Adjust.
(mem_operand_gpr): Adjust comment.
(legitimate_lo_sum_address_p): Ditto.
(rs6000_legitimize_reload_address): Delete.
(rs6000_debug_legitimize_reload_address): Delete.
* config/rs6000/rs6000.h (LEGITIMIZE_RELOAD_ADDRESS): Delete.

---
 gcc/config/rs6000/rs6000-protos.h |   2 -
 gcc/config/rs6000/rs6000.c| 305 +-
 gcc/config/rs6000/rs6000.h|  17 ---
 3 files changed, 3 insertions(+), 321 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-protos.h 
b/gcc/config/rs6000/rs6000-protos.h
index 9af6198..8466368 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -137,8 +137,6 @@ extern void rs6000_emit_le_vsx_permute (rtx, rtx, 
machine_mode);
 extern void rs6000_emit_le_vsx_move (rtx, rtx, machine_mode);
 extern bool valid_sf_si_move (rtx, rtx, machine_mode);
 extern void rs6000_emit_move (rtx, rtx, machine_mode);
-extern rtx (*rs6000_legitimize_reload_address_ptr) (rtx, machine_mode,
-   int, int, int, int *);
 extern bool rs6000_legitimate_offset_address_p (machine_mode, rtx,
bool, bool);
 extern void rs6000_output_tlsargs (rtx *);
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 1114cab..5d5765d 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -1368,10 +1368,6 @@ static rtx rs6000_darwin64_record_arg (CUMULATIVE_ARGS 
*, const_tree,
 static void macho_branch_islands (void);
 static tree get_prev_label (tree);
 #endif
-static rtx rs6000_legitimize_reload_address (rtx, machine_mode, int, int,
-int, int *);
-static rtx rs6000_debug_legitimize_reload_address (rtx, machine_mode, int,
-  int, int, int *);
 static bool rs6000_mode_dependent_address (const_rtx);
 static bool rs6000_debug_mode_dependent_address (const_rtx);
 static bool rs6000_offsettable_memref_p (rtx, machine_mode, bool);
@@ -1392,10 +1388,6 @@ static bool rs6000_debug_can_change_mode_class 
(machine_mode,
 static bool rs6000_save_toc_in_prologue_p (void);
 static rtx rs6000_internal_arg_pointer (void);
 
-rtx (*rs6000_legitimize_reload_address_ptr) (rtx, machine_mode, int, int,
-int, int *)
-  = rs6000_legitimize_reload_address;
-
 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
   = rs6000_mode_dependent_address;
 
@@ -4422,8 +4414,6 @@ rs6000_option_override_internal (bool global_init_p)
= rs6000_debug_can_change_mode_class;
  rs6000_preferred_reload_class_ptr
= rs6000_debug_preferred_reload_class;
- rs6000_legitimize_reload_address_ptr
-   = rs6000_debug_legitimize_reload_address;
  rs6000_mode_dependent_address_ptr
= rs6000_debug_mode_dependent_address;
}
@@ -7663,14 +7653,10 @@ address_offset (rtx op)
 
Accept direct, indexed, offset, lo_sum and tocref.  Since this is
a constraint function we know the operand has satisfied a suitable
-   memory predicate.  Also accept some odd rtl generated by reload
-   (see rs6000_legitimize_reload_address for various forms).  It is
-   important that reload rtl be accepted by appropriate constraints
-   but not by the operand predicate.
+   memory predicate.
 
Offsetting a lo_sum should not be allowed, except where we know by
-   alignment that a 32k boundary is not crossed, but see the ???
-   comment in rs6000_legitimize_reload_address.  Note that by
+   alignment that a 32k boundary is not crossed.  Note that by
"offsetting" here we mean a further offset to access parts of the
MEM.  It's fine to have a lo_sum where the inner address is offset
from a sym, since the same sym+offset will appear in the high part
@@ -8133,8 +8119,7 @@ legitimate_lo_sum_address_p (machine_mode mode, rtx x, 
int strict)
 function says opposite.  In most cases, LRA through different
 transformations can generate correct code for address reloads.
 It cannot manage only some LO_SUM cases.  So we need to add
-code analogous to one in rs6000_legitimize_reload_address for
-LOW_SUM here saying that some addresses are still valid.  */
+code here saying that some addresses are still valid.  */
   

Re: abstract out EH propagation cleanups

2019-05-07 Thread Jeff Law
On 5/7/19 3:45 AM, Richard Biener wrote:
> On Tue, May 7, 2019 at 11:13 AM Aldy Hernandez  wrote:
>>
>> Hi.
>>
>> We seem to have numerous copies of the same EH propagation cleanups
>> scattered throughout the compiler.  The attached patch moves all the
>> logic into one class that allows for easy marking of statements and
>> automatic cleanup once it goes out of scope.
>>
>> Tested on x86-64 Linux.
>>
>> OK for trunk? (*)
> 
> Ugh :/
> 
> First of all I don't like the fact that the actual cleanup is done
> upon constructor execution.  Please make it explicit
> and in the constructor assert that nothing is to be done.
I'm of a mixed mind here.  I have railed against implicit code being run
behind my back for decades.

However as I've had to debug locking issues and the like in other C++
codebases I've become more and more of a fan of RAII and its basic
concepts.  This has made me more open to code running behind my back
like this implicitly when the object gets destroyed.

There's something to be said for embedding this little class into other
objects like Aldy has done and just letting things clean up
automatically as the object goes out of scope.  No more missing calls to
run the cleanup bits, it "just works".

But I won't object if you want it to be more explicit.  I've been there
and understand why one might want the cleanup step to be explicit.



> 
> Then I'm not sure this is a 1:1 transform since for example
> 
> @@ -1061,8 +1173,6 @@
> substitute_and_fold_dom_walker::before_dom_children (basic_block bb)
> }
> 
>gimple *old_stmt = stmt;
> -  bool was_noreturn = (is_gimple_call (stmt)
> -  && gimple_call_noreturn_p (stmt));
> 
>/* Replace real uses in the statement.  */
>did_replace |= substitute_and_fold_engine->replace_uses_in (stmt);
> @@ -1110,25 +1220,7 @@
> substitute_and_fold_dom_walker::before_dom_children (basic_block bb)
>/* Now cleanup.  */
>if (did_replace)
> {
> ...
> + fixups.record_change (old_stmt, stmt);
> 
> here we no longer can reliably determine whether old_stmt was noreturn since
> we substitute into stmt itself.  It's no longer a correctness issue if
> we do _not_
> fixup_noreturn since we now have GF_CALL_CTRL_ALTERING, it's merely
> an optimization issue.  So there may be no testcase for this (previously such
> cases ICEd).
But AFAICT we don't care in the case Aldy is changing.  If we really
want to know if the old statement was a noreturn we can test prior to
queing it.


> 
> I'm also not sure I like to put all these (unrelated) things into a
> single class,
> it really also hides the details of what is performed immediately and what
> delayed and what kind of changes - this makes understanding of pass
> transforms hard.
On the other hand this class defines a contract for what it can and will
do for us and allows us to bring consistency in that handling.  We
declare manual management of this stuff verboten.  Ideally we'd declare
the class final and avoid derivation, but I doubt we can do that
immediately.

Jeff


[PATCH, i386]: Macroize ashrsi3_cvt and ashrdi3_cvt patterns

2019-05-07 Thread Uros Bizjak
2019-05-07  Uroš Bizjak  

* config/i386/i386.md (cvt_mnemonic): New mode attribute.
(ashr3_cvt): Merge insn pattern from ashrsi3_cvt and
ashrdi3_cvt using SWI48 mode iterator.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.
Committed to mainline SVN.

Uros.
Index: config/i386/i386.md
===
--- config/i386/i386.md (revision 270954)
+++ config/i386/i386.md (working copy)
@@ -10872,22 +10872,27 @@
(set_attr "amdfam10_decode" "vector")
(set_attr "bdver1_decode" "vector")])
 
-(define_insn "ashrdi3_cvt"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=*d,rm")
-   (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "*a,0")
-(match_operand:QI 2 "const_int_operand")))
+;; Base name for insn mnemonic.
+(define_mode_attr cvt_mnemonic
+  [(SI "{cltd|cdq}") (DI "{cqto|cqo}")])
+
+(define_insn "ashr3_cvt"
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=*d,rm")
+   (ashiftrt:SWI48
+ (match_operand:SWI48 1 "nonimmediate_operand" "*a,0")
+ (match_operand:QI 2 "const_int_operand")))
(clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && INTVAL (operands[2]) == 63
+  "INTVAL (operands[2]) == GET_MODE_BITSIZE (mode)-1
&& (TARGET_USE_CLTD || optimize_function_for_size_p (cfun))
-   && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)"
+   && ix86_binary_operator_ok (ASHIFTRT, mode, operands)"
   "@
-   {cqto|cqo}
-   sar{q}\t{%2, %0|%0, %2}"
+   
+   sar{}\t{%2, %0|%0, %2}"
   [(set_attr "type" "imovx,ishift")
(set_attr "prefix_0f" "0,*")
(set_attr "length_immediate" "0,*")
(set_attr "modrm" "0,1")
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "")])
 
 (define_insn "*ashrsi3_cvt_zext"
   [(set (match_operand:DI 0 "register_operand" "=*d,r")
@@ -10907,23 +10912,6 @@
(set_attr "modrm" "0,1")
(set_attr "mode" "SI")])
 
-(define_insn "ashrsi3_cvt"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=*d,rm")
-   (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "*a,0")
-(match_operand:QI 2 "const_int_operand")))
-   (clobber (reg:CC FLAGS_REG))]
-  "INTVAL (operands[2]) == 31
-   && (TARGET_USE_CLTD || optimize_function_for_size_p (cfun))
-   && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
-  "@
-   {cltd|cdq}
-   sar{l}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "imovx,ishift")
-   (set_attr "prefix_0f" "0,*")
-   (set_attr "length_immediate" "0,*")
-   (set_attr "modrm" "0,1")
-   (set_attr "mode" "SI")])
-
 (define_expand "x86_shift_adj_3"
   [(use (match_operand:SWI48 0 "register_operand"))
(use (match_operand:SWI48 1 "register_operand"))


[C++ PATCH] A non-type template parm with a placeholder type is type-dependent.

2019-05-07 Thread Jason Merrill
I don't remember which PR I was looking at when I stashed this change away for
GCC 10; it ended up not being necessary, but definitely seems correct.

Tested x86_64-pc-linux-gnu, applying to trunk.

* pt.c (type_dependent_expression_p): A non-type template parm with
a placeholder type is type-dependent.
---
 gcc/cp/pt.c  | 7 +++
 gcc/cp/ChangeLog | 5 +
 2 files changed, 12 insertions(+)

diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index 2f2066e297c..92b67106d63 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -25981,6 +25981,13 @@ type_dependent_expression_p (tree expression)
   return false;
 }
 
+  /* The type of a non-type template parm declared with a placeholder type
+ depends on the corresponding template argument, even though
+ placeholders are not normally considered dependent.  */
+  if (TREE_CODE (expression) == TEMPLATE_PARM_INDEX
+  && is_auto (TREE_TYPE (expression)))
+return true;
+
   gcc_assert (TREE_CODE (expression) != TYPE_DECL);
 
   /* Dependent type attributes might not have made it from the decl to
diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog
index e974951dc52..bd0914b8ffa 100644
--- a/gcc/cp/ChangeLog
+++ b/gcc/cp/ChangeLog
@@ -1,3 +1,8 @@
+2019-05-07  Jason Merrill  
+
+   * pt.c (type_dependent_expression_p): A non-type template parm with
+   a placeholder type is type-dependent.
+
 2019-05-06  Marek Polacek  
 
PR c++/90265 - ICE with generic lambda.

base-commit: 1f51079362f27895c9c4e125549f6cc1b4d50568
-- 
2.20.1



[C++ PATCH] PR c++/86485 - -Wmaybe-unused with empty class ?:

2019-05-07 Thread Jason Merrill
* typeck.c (build_static_cast_1): Use cp_build_addr_expr.

For GCC 9 I fixed this bug with a patch to gimplify_cond_expr, but this
function was also doing the wrong thing.

Using build_address does not push the ADDR_EXPR down into the arms of a
COND_EXPR, which we need for proper handling of conversion of an lvalue ?:
to another reference type.
---
 gcc/cp/typeck.c  | 5 +++--
 gcc/cp/ChangeLog | 3 +++
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/gcc/cp/typeck.c b/gcc/cp/typeck.c
index c107a321949..f039a3b3eb0 100644
--- a/gcc/cp/typeck.c
+++ b/gcc/cp/typeck.c
@@ -5916,7 +5916,8 @@ condition_conversion (tree expr)
 }
 
 /* Returns the address of T.  This function will fold away
-   ADDR_EXPR of INDIRECT_REF.  */
+   ADDR_EXPR of INDIRECT_REF.  This is only for low-level usage;
+   most places should use cp_build_addr_expr instead.  */
 
 tree
 build_address (tree t)
@@ -7114,7 +7115,7 @@ build_static_cast_1 (tree type, tree expr, bool c_cast_p,
   base = lookup_base (TREE_TYPE (type), intype,
  c_cast_p ? ba_unique : ba_check,
  NULL, complain);
-  expr = build_address (expr);
+  expr = cp_build_addr_expr (expr, complain);
 
   if (sanitize_flags_p (SANITIZE_VPTR))
{
diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog
index bd0914b8ffa..d90cc099767 100644
--- a/gcc/cp/ChangeLog
+++ b/gcc/cp/ChangeLog
@@ -1,5 +1,8 @@
 2019-05-07  Jason Merrill  
 
+   PR c++/86485 - -Wmaybe-unused with empty class ?:
+   * typeck.c (build_static_cast_1): Use cp_build_addr_expr.
+
* pt.c (type_dependent_expression_p): A non-type template parm with
a placeholder type is type-dependent.
 

base-commit: 1f51079362f27895c9c4e125549f6cc1b4d50568
prerequisite-patch-id: f9e234ae0c68beb8fed9e212e3da578a940c2995
-- 
2.20.1



Re: [Patch, fortran] ISO_Fortran_binding PRs 90093, 90352 & 90355

2019-05-07 Thread Paul Richard Thomas
Hi Dominique,

Many thanks - I had already found this after replenishing my tree and
regtesting. I don't quite know how it escaped but the fix is obvious.

Amicalement

Paul

On Tue, 7 May 2019 at 09:39, Dominique d'Humières  wrote:
>
> Hi Paul,
>
> With your patch, I see
>
> FAIL: gfortran.dg/iso_c_binding_char_1.f90   -O   (test for errors, line 8)
> FAIL: gfortran.dg/iso_c_binding_char_1.f90   -O   (test for errors, line 9)
> FAIL: gfortran.dg/iso_c_binding_char_1.f90   -O  (test for excess errors)
>
> This is due to a bad location of the errors:
>
> /opt/gcc/work/gcc/testsuite/gfortran.dg/iso_c_binding_char_1.f90:7:16:
>
> 7 | subroutine bar(c,d) BIND(C)
>   |1
> Error: Character argument 'c' at (1) must be length 1 because procedure 'bar' 
> is BIND(C)
> /opt/gcc/work/gcc/testsuite/gfortran.dg/iso_c_binding_char_1.f90:7:18:
>
> 7 | subroutine bar(c,d) BIND(C)
>   |  1
> Error: Character argument 'd' at (1) must be length 1 because procedure 'bar' 
> is BIND(C)
>
> TIA
>
> Dominique



-- 
"If you can't explain it simply, you don't understand it well enough"
- Albert Einstein


Re: Enable BF16 support (Please ignore my former email)

2019-05-07 Thread Uros Bizjak
On Tue, May 7, 2019 at 8:49 AM Hongtao Liu  wrote:

> > > > > > > > > This patch is about to enable support for bfloat16 which 
> > > > > > > > > will be in Future Cooper Lake, Please refer to 
> > > > > > > > > https://software.intel.com/en-us/download/intel-architecture-instruction-set-extensions-programming-reference
> > > > > > > > > for more details about BF16.
> > > > > > > > >
> > > > > > > > > There are 3 instructions for AVX512BF16: VCVTNE2PS2BF16, 
> > > > > > > > > VCVTNEPS2BF16 and DPBF16PS instructions, which are Vector 
> > > > > > > > > Neural Network Instructions supporting:
> > > > > > > > >
> > > > > > > > > -   VCVTNE2PS2BF16: Convert Two Packed Single Data to One 
> > > > > > > > > Packed BF16 Data.
> > > > > > > > > -   VCVTNEPS2BF16: Convert Packed Single Data to Packed 
> > > > > > > > > BF16 Data.
> > > > > > > > > -   VDPBF16PS: Dot Product of BF16 Pairs Accumulated into 
> > > > > > > > > Packed Single Precision.
> > > > > > > > >
> > > > > > > > > Since only BF16 intrinsics are supported, we treat it as HI 
> > > > > > > > > for simplicity.
> > > > > > > >
> > > > > > > > I think it was a mistake declaring cvtps2ph and cvtph2ps using 
> > > > > > > > HImode
> > > > > > > > instead of HFmode. Is there a compelling reason not to introduce
> > > > > > > > corresponding bf16_format supporting infrastructure and declare 
> > > > > > > > these
> > > > > > > > intrinsics using half-binary (HBmode ?) mode instead?
> > > > > > > >
> > > > > > > > Uros.
> > > > > > >
> > > > > > > Bfloat16 isn't IEEE standard which we want to reserve HFmode for.
> > > > > >
> > > > > > True.
> > > > > >
> > > > > > > The IEEE 754 standard specifies a binary16 as having the 
> > > > > > > following format:
> > > > > > > Sign bit: 1 bit
> > > > > > > Exponent width: 5 bits
> > > > > > > Significand precision: 11 bits (10 explicitly stored)
> > > > > > >
> > > > > > > Bfloat16 has the following format:
> > > > > > > Sign bit: 1 bit
> > > > > > > Exponent width: 8 bits
> > > > > > > Significand precision: 8 bits (7 explicitly stored), as opposed 
> > > > > > > to 24
> > > > > > > bits in a classical single-precision floating-point format
> > > > > >
> > > > > > This is why I proposed to introduce HBmode (and corresponding
> > > > > > bfloat16_format) to distingush between ieee HFmode and BFmode.
> > > > > >
> > > > >
> > > > > Unless there is BF16 language level support,  HBmode has no advantage
> > > > > over HImode.   We can add HBmode when we gain BF16 language support.
> > > > >
> > > > > --
> > > > > H.J.
> > > >
> > > > Any other comments, I'll merge this to trunk?
> > >
> > > It is not a regression, so please no.
> >
> > Ehm, "regression fix" ...
> >
> > Uros.
>
> Update patch.

Index: gcc/config/i386/i386-builtins.c
===
--- gcc/config/i386/i386-builtins.c(revision 270934)
+++ gcc/config/i386/i386-builtins.c(working copy)
@@ -1920,6 +1920,7 @@
   F_VPCLMULQDQ,
   F_AVX512VNNI,
   F_AVX512BITALG,
+  F_AVX512BF16,
   F_MAX
 };

@@ -2064,7 +2065,8 @@
   {"gfni",F_GFNI,P_ZERO},
   {"vpclmulqdq", F_VPCLMULQDQ, P_ZERO},
   {"avx512vnni", F_AVX512VNNI, P_ZERO},
-  {"avx512bitalg", F_AVX512BITALG, P_ZERO}
+  {"avx512bitalg", F_AVX512BITALG, P_ZERO},
+  {"avx512bf16", F_AVX512BF16, P_ZERO}
 };

 /* This parses the attribute arguments to target in DECL and determines

You also need to update cpuinfo.h and cpuinfo.c in libgcc/config/i386
with avx512bf16, plus relevant test files.

Index: gcc/testsuite/gcc.target/i386/avx-1.c
Index: gcc/testsuite/gcc.target/i386/avx-2.c

No need to update above two files, sse-*.c changes are enough to cover
new functionality.

Otherwise LGTM, but please repost updated patch with the ChangeLog
entry (please see [1]).

[1] https://www.gnu.org/software/gcc/contribute.html#patches

Uros.


Re: [libcpp] Reimplement mkdeps data structures

2019-05-07 Thread Nathan Sidwell

On 5/7/19 10:31 AM, Christophe Lyon wrote:


After your commit, I'm seeing an ICE while building glibc headers:
: internal compiler error: Segmentation fault
0xc2eeaf crash_signal
 
/tmp/2191418_6.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/toplev.c:326
0x151ad0d munge
 
/tmp/2191418_6.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/libcpp/mkdeps.c:176



while trying to build glibc-headers/bits/stdio_lim.st

(seen on aarch64)


When compiling from stdin, the preprocessor registers an empty file 
dependency (as the first dependency), which previously silently output 
nothing.  That now causes a null pointer dereference.  We shouldn't be 
registering such blank names.


Fixed thusly.

I see glibc uses -MT file1\ file1 to register multiple targets, that's 
somewhat implementation & make specific, but not our problem ...


nathan

--
Nathan Sidwell
2019-05-07  Nathan Sidwell  

	* files.c (_cpp_stack_file): Empty filenames aren't dependencies.
	* mkdeps.c (deps_add_dep): Assert not empty.

Index: libcpp/files.c
===
--- libcpp/files.c	(revision 270940)
+++ libcpp/files.c	(working copy)
@@ -906,11 +906,11 @@ _cpp_stack_file (cpp_reader *pfile, _cpp
 sysp = MAX (pfile->buffer->sysp,  file->dir->sysp);
 
   /* Add the file to the dependencies on its first inclusion.  */
-  if (CPP_OPTION (pfile, deps.style) > !!sysp && !file->stack_count)
-{
-  if (!file->main_file || !CPP_OPTION (pfile, deps.ignore_main_file))
-	deps_add_dep (pfile->deps, file->path);
-}
+  if (!file->stack_count
+  && CPP_OPTION (pfile, deps.style) > !!sysp
+  && file->path[0]
+  && (!file->main_file || !CPP_OPTION (pfile, deps.ignore_main_file)))
+deps_add_dep (pfile->deps, file->path);
 
   /* Clear buffer_valid since _cpp_clean_line messes it up.  */
   file->buffer_valid = false;
Index: libcpp/mkdeps.c
===
--- libcpp/mkdeps.c	(revision 270943)
+++ libcpp/mkdeps.c	(working copy)
@@ -281,6 +281,8 @@ deps_add_default_target (struct mkdeps *
 void
 deps_add_dep (struct mkdeps *d, const char *t)
 {
+  gcc_assert (*t);
+
   t = apply_vpath (d, t);
 
   d->deps.push (xstrdup (t));


resigning as IA-64 maintainer

2019-05-07 Thread Jim Wilson
I haven't had IA-64 hardware access for a few years, but I've been
trying to help out as much as I can without hardware.  Now though, my
RISC-V work is keeping me very busy, and my IA-64 work has dwindled
down to nothing, so it is time for me to resign as an IA-64 maintainer.
Also, Intel has announced end-of-life for IA-64 in Fall 2020, so if
someone else doesn't step up to maintain the port we might want to
think about deprecating it.

I committed the following change to remove myself as IA-64 maintainer.

Jim

2019-05-07  Jim Wilson  

* MAINTAINERS: Remove myself as IA-64 maintainer.

Index: MAINTAINERS
===
--- MAINTAINERS (revision 270763)
+++ MAINTAINERS (working copy)
@@ -72,7 +72,6 @@
 i386 port  Jan Hubicka 
 i386 port  Uros Bizjak 
 i386 vector ISA extns  Kirill Yukhin   
-ia64 port  Jim Wilson  
 iq2000 portNick Clifton
 lm32 port  Sebastien Bourdeauducq  
 m32r port  Nick Clifton


New Ukrainian PO file for 'gcc' (version 9.1.0)

2019-05-07 Thread Translation Project Robot
Hello, gentle maintainer.

This is a message from the Translation Project robot.

A revised PO file for textual domain 'gcc' has been submitted
by the Ukrainian team of translators.  The file is available at:

https://translationproject.org/latest/gcc/uk.po

(This file, 'gcc-9.1.0.uk.po', has just now been sent to you in
a separate email.)

All other PO files for your package are available in:

https://translationproject.org/latest/gcc/

Please consider including all of these in your next release, whether
official or a pretest.

Whenever you have a new distribution with a new version number ready,
containing a newer POT file, please send the URL of that distribution
tarball to the address below.  The tarball may be just a pretest or a
snapshot, it does not even have to compile.  It is just used by the
translators when they need some extra translation context.

The following HTML page has been updated:

https://translationproject.org/domain/gcc.html

If any question arises, please contact the translation coordinator.

Thank you for all your work,

The Translation Project robot, in the
name of your translation coordinator.




New template for 'gcc' made available

2019-05-07 Thread Translation Project Robot
Hello, gentle maintainer.

This is a message from the Translation Project robot.  (If you have
any questions, send them to .)

A new POT file for textual domain 'gcc' has been made available
to the language teams for translation.  It is archived as:

https://translationproject.org/POT-files/gcc-9.1.0.pot

Whenever you have a new distribution with a new version number ready,
containing a newer POT file, please send the URL of that distribution
tarball to the address below.  The tarball may be just a pretest or a
snapshot, it does not even have to compile.  It is just used by the
translators when they need some extra translation context.

Below is the URL which has been provided to the translators of your
package.  Please inform the translation coordinator, at the address
at the bottom, if this information is not current:

https://ftp.gnu.org/gnu/gcc/gcc-9.1.0/gcc-9.1.0.tar.xz

Translated PO files will later be automatically e-mailed to you.

Thank you for all your work,

The Translation Project robot, in the
name of your translation coordinator.




Re: [PATCH] Eliminates phi on branch for CMP result

2019-05-07 Thread Andrew Pinski
On Mon, May 6, 2019 at 7:24 AM Jiufu Guo  wrote:
>
> Hi,
>
> This patch implements the optimization in PR77820.  The optimization
> eliminates phi and phi's basic block, if the phi is used only by
> condition branch, and the phi's incoming value in the result of a
> CMP result.
>
> This optimization eliminates:
> 1. saving CMP result: p0 = a CMP b.
> 2. additional CMP on branch: if (phi != 0).
> 3. converting CMP result if there is phi = (INT_CONV) p0 if there is.
>
>   
>   p0 = a CMP b
>   goto ;
>
>   
>   p1 = c CMP d
>   goto ;
>
>   
>   # phi = PHI 
>   if (phi != 0) goto ; else goto ;
>
> Transform to:
>
>   
>   p0 = a CMP b
>   if (p0 != 0) goto ; else goto ;
>
>   
>   p1 = c CMP d
>   if (p1 != 0) goto ; else goto ;
>
> Bootstrapped and tested on powerpc64le with no regressions, and testcases were
> saved. Is this ok for trunk?

forwprop was created orginally to something similar but this case is a
specific case of backwards prop (almost).
I wonder if it could be combined with that or as Richard mentioned,
jump threading.

Thanks,
Andrew Pinski

>
> Thanks!
>
> [gcc]
> 2019-05-06  Jiufu Guo  
> Lijia He  
>
> PR tree-optimization/77820
> * tree-ssa-mergephicmp.c: New file.
> * Makefile.in (OBJS): Add tree-ssa-mergephicmp.o.
> * common.opt (ftree-mergephicmp): New flag.
> * passes.def (pass_mergephicmp): New pass.
> * timevar.def (TV_TREE_MERGEPHICMP): New timevar.
> * tree-pass.h: New file.
>
> [gcc/testsuite]
> 2019-05-06  Jiufu Guo  
> Lijia He  
>
> PR tree-optimization/77820
> * gcc.dg/tree-ssa/phi_on_compare-1.c: New testcase.
> * gcc.dg/tree-ssa/phi_on_compare-2.c: New testcase.
>
>
> ---
>  gcc/Makefile.in  |   1 +
>  gcc/common.opt   |   4 +
>  gcc/passes.def   |   1 +
>  gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c |  31 +++
>  gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c |  31 +++
>  gcc/timevar.def  |   1 +
>  gcc/tree-pass.h  |   1 +
>  gcc/tree-ssa-mergephicmp.c   | 260 
> +++
>  8 files changed, 330 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c
>  create mode 100644 gcc/tree-ssa-mergephicmp.c
>
> diff --git a/gcc/Makefile.in b/gcc/Makefile.in
> index d186d71..9729501 100644
> --- a/gcc/Makefile.in
> +++ b/gcc/Makefile.in
> @@ -1567,6 +1567,7 @@ OBJS = \
> tree-ssa-reassoc.o \
> tree-ssa-sccvn.o \
> tree-ssa-scopedtables.o \
> +   tree-ssa-mergephicmp.o \
> tree-ssa-sink.o \
> tree-ssa-strlen.o \
> tree-ssa-structalias.o \
> diff --git a/gcc/common.opt b/gcc/common.opt
> index d342c4f..5ea5ed2 100644
> --- a/gcc/common.opt
> +++ b/gcc/common.opt
> @@ -2702,6 +2702,10 @@ ftree-salias
>  Common Ignore
>  Does nothing.  Preserved for backward compatibility.
>
> +ftree-mergephicmp
> +Common Report Var(flag_mergephicmp) Init(1) Optimization
> +Enable optimization on branch phi compare on trees.
> +
>  ftree-sink
>  Common Report Var(flag_tree_sink) Optimization
>  Enable SSA code sinking on trees.
> diff --git a/gcc/passes.def b/gcc/passes.def
> index 446a7c4..e3a3913 100644
> --- a/gcc/passes.def
> +++ b/gcc/passes.def
> @@ -249,6 +249,7 @@ along with GCC; see the file COPYING3.  If not see
>NEXT_PASS (pass_lim);
>NEXT_PASS (pass_walloca, false);
>NEXT_PASS (pass_pre);
> +  NEXT_PASS (pass_mergephicmp);
>NEXT_PASS (pass_sink_code);
>NEXT_PASS (pass_sancov);
>NEXT_PASS (pass_asan);
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c
> new file mode 100644
> index 000..2e3f4f6
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c
> @@ -0,0 +1,31 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-mergephicmp" } */
> +
> +void g (void);
> +void g1 (void);
> +
> +void
> +f (long a, long b, long c, long d, int x)
> +{
> +  _Bool t;
> +  if (x)
> +{
> +  t = a < b;
> +}
> +  else if (d == a + b)
> +{
> +  t = c < d;
> +}
> +  else
> +{
> +  t = a == c;
> +}
> +
> +  if (t)
> +{
> +  g1 ();
> +  g ();
> +}
> +}
> +
> +/* { dg-final { scan-tree-dump-not "PHI" "mergephicmp" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c
> new file mode 100644
> index 000..7c35417
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c
> @@ -0,0 +1,31 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-mergephicmp" } */
> +
> +void g (void);
> +void g1 (void);
> +
> +void
> +f (long a, long b, long c, long d, int x)
> +{
> +  int t;

Re: [PATCH] Eliminates phi on branch for CMP result

2019-05-07 Thread Segher Boessenkool
Let me try to answer some of this...

On Tue, May 07, 2019 at 03:31:27PM +0200, Richard Biener wrote:
> On Mon, 6 May 2019, Jiufu Guo wrote:
> > This patch implements the optimization in PR77820.  The optimization
> > eliminates phi and phi's basic block, if the phi is used only by
> > condition branch, and the phi's incoming value in the result of a
> > CMP result.

> I'm not sure I like a new pass here ;)  The transform is basically
> tail-duplicating the PHI block because the exit conditional can
> be "simplfied" - that's something jump threading generally does
> though it relies on "simplified" being a little bit more simplified
> than above.

Right, but where in the pipeline does this fit in?

> I suspect this transform was implemented because of some benchmark?

Something in SPEC...  2006 iirc...  Will need to dig it up, I forgot
the details.

> I suspect the performance benefit is because of better branch
> prediction by not mangling both conditional branches into one?

No, it is that previously a condition was moved to a GPR, and then compared
again.  See PR77820.  This is expensive, and serial, too.

> The transform is also somewhat similar to tail-duplication done
> in path splitting or tracer.

Yes.

> The pass itself does quite strict pattern-matching but I wonder
> if more cases should be handled this way.

Maybe.  Probably.  But which?

> Any specific reason for the pass placement between PRE and sinking?
> tracer and path splitting run much later, jump threading runs
> all over the place.

Dunno.  Jiufu, does the pass placement matter much?


Segher


RE: [Aarch64][SVE] Vectorise sum-of-absolute-differences

2019-05-07 Thread Alejandro Martinez Vicente
Great, committed in rev. 270975

Alejandro

> -Original Message-
> From: Richard Sandiford 
> Sent: 07 May 2019 17:18
> To: Alejandro Martinez Vicente 
> Cc: James Greenhalgh ; GCC Patches  patc...@gcc.gnu.org>; nd ; Richard Biener
> 
> Subject: Re: [Aarch64][SVE] Vectorise sum-of-absolute-differences
> 
> Alejandro Martinez Vicente  writes:
> > Thanks for your comments Richard. I think this patch addresses them.
> 
> Yeah, this is OK to install, thanks.
> 
> Richard
> 
> >
> > Alejandro
> >
> >> -Original Message-
> >> From: Richard Sandiford 
> >> Sent: 07 May 2019 15:46
> >> To: Alejandro Martinez Vicente 
> >> Cc: James Greenhalgh ; GCC Patches  >> patc...@gcc.gnu.org>; nd ; Richard Biener
> >> 
> >> Subject: Re: [Aarch64][SVE] Vectorise sum-of-absolute-differences
> >>
> >> Alejandro Martinez Vicente 
> writes:
> >> > +;; Helper expander for aarch64_abd_3 to save the callers
> >> > +;; the hassle of constructing the other arm of the MINUS.
> >> > +(define_expand "abd_3"
> >> > +  [(use (match_operand:SVE_I 0 "register_operand"))
> >> > +   (USMAX:SVE_I (match_operand:SVE_I 1 "register_operand")
> >> > +(match_operand:SVE_I 2 "register_operand"))]
> >> > +  "TARGET_SVE"
> >> > +  {
> >> > +rtx other_arm
> >> > +  = simplify_gen_binary (, mode, operands[1],
> >> > +operands[2]);
> >>
> >> I realise this is just copied from the Advanced SIMD version, but
> >> simplify_gen_binary is a bit dangerous here, since we explicitly want
> >> an unsimplified  with the two operands given.  Probably
> better as:
> >>
> >>   gen_rtx_ (mode, ...)
> >>
> >> > +emit_insn (gen_aarch64_abd_3 (operands[0],
> operands[1],
> >> > +   operands[2], other_arm));
> >> > +DONE;
> >> > +  }
> >> > +)
> >> > +
> >> > +;; Unpredicated integer absolute difference.
> >> > +(define_expand "aarch64_abd_3"
> >> > +  [(set (match_operand:SVE_I 0 "register_operand")
> >> > +(unspec:SVE_I
> >> > +  [(match_dup 4)
> >> > +   (minus:SVE_I
> >> > + (USMAX:SVE_I
> >> > +   (match_operand:SVE_I 1 "register_operand" "w")
> >> > +   (match_operand:SVE_I 2 "register_operand" "w"))
> >> > + (match_operator 3 "aarch64_"
> >> > +   [(match_dup 1)
> >> > +(match_dup 2)]))]
> >> > +  UNSPEC_MERGE_PTRUE))]
> >> > +  "TARGET_SVE"
> >> > +  {
> >> > +operands[4] = force_reg (mode, CONSTM1_RTX
> >> (mode));
> >> > +  }
> >> > +)
> >>
> >> I think we should go directly from abd_3 to the final
> >> pattern, so that abd_3 does the force_reg too.  This would
> make...
> >>
> >> > +;; Predicated integer absolute difference.
> >> > +(define_insn "*aarch64_abd_3"
> >>
> >> ...this the named pattern, instead of starting with "*".
> >>
> >> > +  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?")
> >> > +(unspec:SVE_I
> >> > +  [(match_operand: 1 "register_operand" "Upl, Upl")
> >> > +   (minus:SVE_I
> >> > + (USMAX:SVE_I
> >> > +   (match_operand:SVE_I 2 "register_operand" "w, w")
> >>
> >> Should be "0, w", so that the first alternative ties the input to the 
> >> output.
> >>
> >> > +   (match_operand:SVE_I 3 "register_operand" "w, w"))
> >> > + (match_operator 4 "aarch64_"
> >> > +   [(match_dup 2)
> >> > +(match_dup 3)]))]
> >> > +  UNSPEC_MERGE_PTRUE))]
> >> > +  "TARGET_SVE"
> >> > +  "@
> >> > +   abd\t%0., %1/m, %0., %3.
> >> > +
> >>
> movprfx\t%0, %2\;abd\t%0., %1/m, %0., %3. >> >"
> >> > +  [(set_attr "movprfx" "*,yes")]
> >> > +)
> >> > +
> >> > +;; Emit a sequence to produce a sum-of-absolute-differences of the
> >> > +inputs in ;; operands 1 and 2.  The sequence also has to perform a
> >> > +widening reduction of ;; the difference into a vector and
> >> > +accumulate that into operand 3 before ;; copying that into the result
> operand 0.
> >> > +;; Perform that with a sequence of:
> >> > +;; MOV  ones.b, #1
> >> > +;; UABD diff.b, p0/m, op1.b, op2.b
> >> > +;; UDOT op3.s, diff.b, ones.b
> >> > +;; MOV  op0, op3  // should be eliminated in later passes.
> >> > +;; The signed version just uses the signed variants of the above
> >> instructions.
> >>
> >> Think it would be clearer if we removed the last line and just used
> >> [SU]ABD instead of UABD, since that's the only sign-dependent part of the
> operation.
> >> Also think we should SVEize it with MOVPRFX, since a separate MOV
> >> should never be needed:
> >>
> >> ;; MOV ones.b, #1
> >> ;; [SU]ABD diff.b, ptrue/m, op1.b, op2.b
> >> ;; MOVPRFX op0, op3// If necessary
> >> ;; UDOTop0.s, diff.b, ones.b
> >>
> >> > +(define_expand "sad"
> >> > +  [(use (match_operand:SVE_SDI 0 "register_operand"))
> >> > +   (unspec: [(use (match_operand: 1
> "register_operand"))
> >> > +(use (match_operand: 2 "register_operand"))]
> >> ABAL)
> >> 

Re: [Aarch64][SVE] Vectorise sum-of-absolute-differences

2019-05-07 Thread Richard Sandiford
Alejandro Martinez Vicente  writes:
> Thanks for your comments Richard. I think this patch addresses them.

Yeah, this is OK to install, thanks.

Richard

>
> Alejandro
>
>> -Original Message-
>> From: Richard Sandiford 
>> Sent: 07 May 2019 15:46
>> To: Alejandro Martinez Vicente 
>> Cc: James Greenhalgh ; GCC Patches > patc...@gcc.gnu.org>; nd ; Richard Biener
>> 
>> Subject: Re: [Aarch64][SVE] Vectorise sum-of-absolute-differences
>> 
>> Alejandro Martinez Vicente  writes:
>> > +;; Helper expander for aarch64_abd_3 to save the callers ;;
>> > +the hassle of constructing the other arm of the MINUS.
>> > +(define_expand "abd_3"
>> > +  [(use (match_operand:SVE_I 0 "register_operand"))
>> > +   (USMAX:SVE_I (match_operand:SVE_I 1 "register_operand")
>> > +  (match_operand:SVE_I 2 "register_operand"))]
>> > +  "TARGET_SVE"
>> > +  {
>> > +rtx other_arm
>> > +  = simplify_gen_binary (, mode, operands[1],
>> > +operands[2]);
>> 
>> I realise this is just copied from the Advanced SIMD version, but
>> simplify_gen_binary is a bit dangerous here, since we explicitly want an
>> unsimplified  with the two operands given.  Probably better as:
>> 
>>   gen_rtx_ (mode, ...)
>> 
>> > +emit_insn (gen_aarch64_abd_3 (operands[0], operands[1],
>> > + operands[2], other_arm));
>> > +DONE;
>> > +  }
>> > +)
>> > +
>> > +;; Unpredicated integer absolute difference.
>> > +(define_expand "aarch64_abd_3"
>> > +  [(set (match_operand:SVE_I 0 "register_operand")
>> > +  (unspec:SVE_I
>> > +[(match_dup 4)
>> > + (minus:SVE_I
>> > +   (USMAX:SVE_I
>> > + (match_operand:SVE_I 1 "register_operand" "w")
>> > + (match_operand:SVE_I 2 "register_operand" "w"))
>> > +   (match_operator 3 "aarch64_"
>> > + [(match_dup 1)
>> > +  (match_dup 2)]))]
>> > +UNSPEC_MERGE_PTRUE))]
>> > +  "TARGET_SVE"
>> > +  {
>> > +operands[4] = force_reg (mode, CONSTM1_RTX
>> (mode));
>> > +  }
>> > +)
>> 
>> I think we should go directly from abd_3 to the final pattern, so
>> that abd_3 does the force_reg too.  This would make...
>> 
>> > +;; Predicated integer absolute difference.
>> > +(define_insn "*aarch64_abd_3"
>> 
>> ...this the named pattern, instead of starting with "*".
>> 
>> > +  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?")
>> > +  (unspec:SVE_I
>> > +[(match_operand: 1 "register_operand" "Upl, Upl")
>> > + (minus:SVE_I
>> > +   (USMAX:SVE_I
>> > + (match_operand:SVE_I 2 "register_operand" "w, w")
>> 
>> Should be "0, w", so that the first alternative ties the input to the output.
>> 
>> > + (match_operand:SVE_I 3 "register_operand" "w, w"))
>> > +   (match_operator 4 "aarch64_"
>> > + [(match_dup 2)
>> > +  (match_dup 3)]))]
>> > +UNSPEC_MERGE_PTRUE))]
>> > +  "TARGET_SVE"
>> > +  "@
>> > +   abd\t%0., %1/m, %0., %3.
>> > +
>> movprfx\t%0, %2\;abd\t%0., %1/m, %0., %3.> >"
>> > +  [(set_attr "movprfx" "*,yes")]
>> > +)
>> > +
>> > +;; Emit a sequence to produce a sum-of-absolute-differences of the
>> > +inputs in ;; operands 1 and 2.  The sequence also has to perform a
>> > +widening reduction of ;; the difference into a vector and accumulate
>> > +that into operand 3 before ;; copying that into the result operand 0.
>> > +;; Perform that with a sequence of:
>> > +;; MOVones.b, #1
>> > +;; UABD   diff.b, p0/m, op1.b, op2.b
>> > +;; UDOT   op3.s, diff.b, ones.b
>> > +;; MOVop0, op3  // should be eliminated in later passes.
>> > +;; The signed version just uses the signed variants of the above
>> instructions.
>> 
>> Think it would be clearer if we removed the last line and just used [SU]ABD
>> instead of UABD, since that's the only sign-dependent part of the operation.
>> Also think we should SVEize it with MOVPRFX, since a separate MOV should
>> never be needed:
>> 
>> ;; MOV   ones.b, #1
>> ;; [SU]ABD   diff.b, ptrue/m, op1.b, op2.b
>> ;; MOVPRFX   op0, op3// If necessary
>> ;; UDOT  op0.s, diff.b, ones.b
>> 
>> > +(define_expand "sad"
>> > +  [(use (match_operand:SVE_SDI 0 "register_operand"))
>> > +   (unspec: [(use (match_operand: 1 "register_operand"))
>> > +  (use (match_operand: 2 "register_operand"))]
>> ABAL)
>> > +   (use (match_operand:SVE_SDI 3 "register_operand"))]
>> > +  "TARGET_SVE"
>> > +  {
>> > +rtx ones = force_reg (mode, CONST1_RTX (mode));
>> > +rtx diff = gen_reg_rtx (mode);
>> > +emit_insn (gen_abd_3 (diff, operands[1], operands[2]));
>> > +emit_insn (gen_udot_prod (operands[3], diff, ones,
>> operands[3]));
>> > +emit_move_insn (operands[0], operands[3]);
>> 
>> It would be better to make operands[0] the destination of the UDOT and
>> drop the move.
>> 
>> Thanks,
>> Richard
>> 
>> > +DONE;
>> > +  }
>> > +)
>> > diff --git a/gcc/config/aarch64/iterators.md
>> > b/gcc/config/aarch64/iterators.md index b3b2d6e..20aa0e9 100644
>> > --- 

GCC 8 backports

2019-05-07 Thread Marek Polacek
I'm backporting these 5 patches to gcc-8.

Marek
2019-02-27  Marek Polacek  

PR c++/88857 - ICE with value-initialization of argument in template.
* call.c (convert_like_real): Don't call build_value_init in template.

--- gcc/cp/call.c
+++ gcc/cp/call.c
@@ -7005,7 +7005,8 @@ convert_like_real (conversion *convs, tree expr, tree fn, 
int argnum,
/* If we're initializing from {}, it's value-initialization.  */
if (BRACE_ENCLOSED_INITIALIZER_P (expr)
&& CONSTRUCTOR_NELTS (expr) == 0
-   && TYPE_HAS_DEFAULT_CONSTRUCTOR (totype))
+   && TYPE_HAS_DEFAULT_CONSTRUCTOR (totype)
+   && !processing_template_decl)
  {
bool direct = CONSTRUCTOR_IS_DIRECT_INIT (expr);
if (abstract_virtuals_error_sfinae (NULL_TREE, totype, complain))
--- /dev/null
+++ gcc/testsuite/g++.dg/cpp0x/initlist-value4.C
@@ -0,0 +1,12 @@
+// PR c++/88857
+// { dg-do compile { target c++11 } }
+
+class S { int a; };
+void foo (const S &, int);
+
+template 
+void
+bar ()
+{
+  foo ({}); // { dg-error "too few arguments to function" }
+}
2019-03-25  Marek Polacek  
 
PR c++/89214 - ICE when initializing aggregates with bases.
* typeck2.c (digest_init_r): Warn about object slicing instead of
crashing.

--- gcc/cp/typeck2.c
+++ gcc/cp/typeck2.c
@@ -1209,8 +1209,29 @@ digest_init_r (tree type, tree init, int nested, int 
flags,
 {
   tree elt = CONSTRUCTOR_ELT (stripped_init, 0)->value;
   if (reference_related_p (type, TREE_TYPE (elt)))
-   /* We should have fixed this in reshape_init.  */
-   gcc_unreachable ();
+   {
+ /* In C++17, aggregates can have bases, thus participate in
+aggregate initialization.  In the following case:
+
+  struct B { int c; };
+  struct D : B { };
+  D d{{D{{42;
+
+   there's an extra set of braces, so the D temporary initializes
+   the first element of d, which is the B base subobject.  The base
+   of type B is copy-initialized from the D temporary, causing
+   object slicing.  */
+ tree field = next_initializable_field (TYPE_FIELDS (type));
+ if (field && DECL_FIELD_IS_BASE (field))
+   {
+ if (warning_at (loc, 0, "initializing a base class of type %qT "
+ "results in object slicing", TREE_TYPE (field)))
+   inform (loc, "remove %<{ }%> around initializer");
+   }
+ else
+   /* We should have fixed this in reshape_init.  */
+   gcc_unreachable ();
+   }
 }
 
   if (BRACE_ENCLOSED_INITIALIZER_P (stripped_init)
--- /dev/null
+++ gcc/testsuite/g++.dg/cpp1z/aggr-base8.C
@@ -0,0 +1,48 @@
+// PR c++/89214
+// { dg-do compile { target c++17 } }
+
+struct A
+{
+  A (int);
+};
+
+struct BB
+{
+  A a;
+};
+
+struct B : BB
+{
+};
+
+void
+foo ()
+{
+  B b1 = {42};
+  B b2 = {{42}};
+  B b3 = {{{42}}};
+
+  B b4 = B{42};
+  B b5 = B{{42}};
+  B b6 = B{{{42}}};
+
+  B b7 = {B{42}};
+  B b8 = {B{{42}}};
+  B b9 = {B{{{42;
+
+  B b10 = {{B{42}}}; // { dg-warning "initializing a base class of type .BB. 
results in object slicing" }
+  B b11 = {{B{{42; // { dg-warning "initializing a base class of type .BB. 
results in object slicing" }
+  B b12 = {{B{{{42}; // { dg-warning "initializing a base class of type 
.BB. results in object slicing" }
+
+  B bb1{42};
+  B bb2{{42}};
+  B bb3{{{42}}};
+
+  B bb7{B{42}};
+  B bb8{B{{42}}};
+  B bb9{B{{{42;
+
+  B bb10{{B{42}}}; // { dg-warning "initializing a base class of type .BB. 
results in object slicing" }
+  B bb11{{B{{42; // { dg-warning "initializing a base class of type .BB. 
results in object slicing" }
+  B bb12{{B{{{42}; // { dg-warning "initializing a base class of type .BB. 
results in object slicing" }
+}
--- /dev/null
+++ gcc/testsuite/g++.dg/cpp1z/aggr-base9.C
@@ -0,0 +1,33 @@
+// PR c++/89214
+// { dg-do compile { target c++17 } }
+
+struct B {
+  int c;
+};
+
+struct D : B { };
+
+void
+foo ()
+{
+  D d1 = {42};
+  D d2 = {{42}};
+  
+  D d4 = D{42};
+  D d5 = D{{42}};
+ 
+  D d7 = {D{42}};
+  D d8 = {D{{42}}};
+
+  D d10 = {{D{42}}}; // { dg-warning "initializing a base class of type .B. 
results in object slicing" }
+  D d11 = {{D{{42; // { dg-warning "initializing a base class of type .B. 
results in object slicing" }
+
+  D dd1{42};
+  D dd2{{42}};
+  
+  D dd7{D{42}};
+  D dd8{D{{42}}};
+
+  D dd10{{D{42}}}; // { dg-warning "initializing a base class of type .B. 
results in object slicing" }
+  D dd11{{D{{42; // { dg-warning "initializing a base class of type .B. 
results in object slicing" }
+}
2019-02-27  Marek Polacek  

PR c++/89511 - ICE with using-declaration and unscoped enumerator.
* parser.c (cp_parser_using_declaration): For an unscoped enum
only use its context if it's not a function declaration.

--- gcc/cp/parser.c
+++ gcc/cp/parser.c
@@ -19412,7 

RE: [Aarch64][SVE] Vectorise sum-of-absolute-differences

2019-05-07 Thread Alejandro Martinez Vicente
Thanks for your comments Richard. I think this patch addresses them.

Alejandro

> -Original Message-
> From: Richard Sandiford 
> Sent: 07 May 2019 15:46
> To: Alejandro Martinez Vicente 
> Cc: James Greenhalgh ; GCC Patches  patc...@gcc.gnu.org>; nd ; Richard Biener
> 
> Subject: Re: [Aarch64][SVE] Vectorise sum-of-absolute-differences
> 
> Alejandro Martinez Vicente  writes:
> > +;; Helper expander for aarch64_abd_3 to save the callers ;;
> > +the hassle of constructing the other arm of the MINUS.
> > +(define_expand "abd_3"
> > +  [(use (match_operand:SVE_I 0 "register_operand"))
> > +   (USMAX:SVE_I (match_operand:SVE_I 1 "register_operand")
> > +   (match_operand:SVE_I 2 "register_operand"))]
> > +  "TARGET_SVE"
> > +  {
> > +rtx other_arm
> > +  = simplify_gen_binary (, mode, operands[1],
> > +operands[2]);
> 
> I realise this is just copied from the Advanced SIMD version, but
> simplify_gen_binary is a bit dangerous here, since we explicitly want an
> unsimplified  with the two operands given.  Probably better as:
> 
>   gen_rtx_ (mode, ...)
> 
> > +emit_insn (gen_aarch64_abd_3 (operands[0], operands[1],
> > +  operands[2], other_arm));
> > +DONE;
> > +  }
> > +)
> > +
> > +;; Unpredicated integer absolute difference.
> > +(define_expand "aarch64_abd_3"
> > +  [(set (match_operand:SVE_I 0 "register_operand")
> > +   (unspec:SVE_I
> > + [(match_dup 4)
> > +  (minus:SVE_I
> > +(USMAX:SVE_I
> > +  (match_operand:SVE_I 1 "register_operand" "w")
> > +  (match_operand:SVE_I 2 "register_operand" "w"))
> > +(match_operator 3 "aarch64_"
> > +  [(match_dup 1)
> > +   (match_dup 2)]))]
> > + UNSPEC_MERGE_PTRUE))]
> > +  "TARGET_SVE"
> > +  {
> > +operands[4] = force_reg (mode, CONSTM1_RTX
> (mode));
> > +  }
> > +)
> 
> I think we should go directly from abd_3 to the final pattern, so
> that abd_3 does the force_reg too.  This would make...
> 
> > +;; Predicated integer absolute difference.
> > +(define_insn "*aarch64_abd_3"
> 
> ...this the named pattern, instead of starting with "*".
> 
> > +  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?")
> > +   (unspec:SVE_I
> > + [(match_operand: 1 "register_operand" "Upl, Upl")
> > +  (minus:SVE_I
> > +(USMAX:SVE_I
> > +  (match_operand:SVE_I 2 "register_operand" "w, w")
> 
> Should be "0, w", so that the first alternative ties the input to the output.
> 
> > +  (match_operand:SVE_I 3 "register_operand" "w, w"))
> > +(match_operator 4 "aarch64_"
> > +  [(match_dup 2)
> > +   (match_dup 3)]))]
> > + UNSPEC_MERGE_PTRUE))]
> > +  "TARGET_SVE"
> > +  "@
> > +   abd\t%0., %1/m, %0., %3.
> > +
> movprfx\t%0, %2\;abd\t%0., %1/m, %0., %3. >"
> > +  [(set_attr "movprfx" "*,yes")]
> > +)
> > +
> > +;; Emit a sequence to produce a sum-of-absolute-differences of the
> > +inputs in ;; operands 1 and 2.  The sequence also has to perform a
> > +widening reduction of ;; the difference into a vector and accumulate
> > +that into operand 3 before ;; copying that into the result operand 0.
> > +;; Perform that with a sequence of:
> > +;; MOV ones.b, #1
> > +;; UABDdiff.b, p0/m, op1.b, op2.b
> > +;; UDOTop3.s, diff.b, ones.b
> > +;; MOV op0, op3  // should be eliminated in later passes.
> > +;; The signed version just uses the signed variants of the above
> instructions.
> 
> Think it would be clearer if we removed the last line and just used [SU]ABD
> instead of UABD, since that's the only sign-dependent part of the operation.
> Also think we should SVEize it with MOVPRFX, since a separate MOV should
> never be needed:
> 
> ;; MOVones.b, #1
> ;; [SU]ABDdiff.b, ptrue/m, op1.b, op2.b
> ;; MOVPRFXop0, op3// If necessary
> ;; UDOT   op0.s, diff.b, ones.b
> 
> > +(define_expand "sad"
> > +  [(use (match_operand:SVE_SDI 0 "register_operand"))
> > +   (unspec: [(use (match_operand: 1 "register_operand"))
> > +   (use (match_operand: 2 "register_operand"))]
> ABAL)
> > +   (use (match_operand:SVE_SDI 3 "register_operand"))]
> > +  "TARGET_SVE"
> > +  {
> > +rtx ones = force_reg (mode, CONST1_RTX (mode));
> > +rtx diff = gen_reg_rtx (mode);
> > +emit_insn (gen_abd_3 (diff, operands[1], operands[2]));
> > +emit_insn (gen_udot_prod (operands[3], diff, ones,
> operands[3]));
> > +emit_move_insn (operands[0], operands[3]);
> 
> It would be better to make operands[0] the destination of the UDOT and
> drop the move.
> 
> Thanks,
> Richard
> 
> > +DONE;
> > +  }
> > +)
> > diff --git a/gcc/config/aarch64/iterators.md
> > b/gcc/config/aarch64/iterators.md index b3b2d6e..20aa0e9 100644
> > --- a/gcc/config/aarch64/iterators.md
> > +++ b/gcc/config/aarch64/iterators.md
> > @@ -1060,6 +1060,9 @@
> >  ;; Map smax to smin and umax to umin.
> >  (define_code_attr max_opp [(smax "smin") (umax "umin")])
> >

Re: [gcc-10 PATCH, i386]: Use accessible_reg_set to disable unsupported register sets

2019-05-07 Thread Uros Bizjak
On Wed, Nov 28, 2018 at 12:38 PM Richard Sandiford
 wrote:
>
> Uros Bizjak  writes:
> > diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
> > index 21bdcdaeaa35..691e0c7c1b0b 100644
> > --- a/gcc/cfgexpand.c
> > +++ b/gcc/cfgexpand.c
> > @@ -2981,6 +2981,14 @@ expand_asm_stmt (gasm *stmt)
> >  regname);
> >   return;
> > }
> > + else if (!in_hard_reg_set_p
> > +  (accessible_reg_set, reg_raw_mode[reg], reg))
> > +   {
> > + /* ??? Diagnose during gimplification?  */
> > + error ("the register %qs cannot be clobbered in %"
> > +" for the current target", regname);
> > + return;
> > +   }
> >
> >   SET_HARD_REG_BIT (clobbered_regs, reg);
> >   rtx x = gen_rtx_REG (reg_raw_mode[reg], reg);
>
> There's an argument that clobbering an inaccessible register should
> simply be a no-op, so that e.g. you can describe a syscall ABI's effect
> on FP registers without having to check whether FP registers are
> disabled.  But if an asm explicitly uses the clobbered register,
> it would be surprising not to at least warn.  Since we can't reliably
> distinguish the two cases, I agree going for the safest is best.
>
> So OK (for GCC 10, like you say) if no-one objects in the meantime.

Now committed.

Uros.


[PATCH] Fix incorrect DR numbers in libstdc++ manual

2019-05-07 Thread Jonathan Wakely

* doc/xml/manual/intro.xml: Fix DR 2537 and DR 2566 confusion.

Committed to trunk.

commit 99929bcbbb2c8e0234dbdaf11866d2399f1f039e
Author: Jonathan Wakely 
Date:   Tue May 7 15:41:00 2019 +0100

Fix incorrect DR numbers in libstdc++ manual

* doc/xml/manual/intro.xml: Fix DR 2537 and DR 2566 confusion.

diff --git a/libstdc++-v3/doc/xml/manual/intro.xml 
b/libstdc++-v3/doc/xml/manual/intro.xml
index 9761b82fd65..518981559a0 100644
--- a/libstdc++-v3/doc/xml/manual/intro.xml
+++ b/libstdc++-v3/doc/xml/manual/intro.xml
@@ -1121,13 +1121,6 @@ requirements of the license of GCC.
 
 
 http://www.w3.org/1999/xlink; xlink:href="#2537">2537:
-   Requirements on the first template parameter of container 
adaptors
-   
-
-Add static assertions to enforce the requirement.
-
-
-http://www.w3.org/1999/xlink; xlink:href="#2566">2566:
Constructors for priority_queue taking allocators
 should call make_heap

@@ -1135,6 +1128,13 @@ requirements of the license of GCC.
 Call make_heap.
 
 
+http://www.w3.org/1999/xlink; xlink:href="#2566">2566:
+   Requirements on the first template parameter of container 
adaptors
+   
+
+Add static assertions to enforce the requirement.
+
+
 http://www.w3.org/1999/xlink; xlink:href="#2583">2583:
There is no way to supply an allocator for 
basic_string(str, pos)



[PATCH] Improve API docs for std::regex

2019-05-07 Thread Jonathan Wakely

* include/bits/regex.h: Improve docs.
* include/bits/regex.tcc: Do not document implementation details.

Tested powerpc64le-linux. Committed to trunk.

commit abbe3b64701718dc3327039127a07547391e29d2
Author: Jonathan Wakely 
Date:   Tue May 7 15:10:12 2019 +0100

Improve API docs for std::regex

* include/bits/regex.h: Improve docs.
* include/bits/regex.tcc: Do not document implementation details.

diff --git a/libstdc++-v3/include/bits/regex.h 
b/libstdc++-v3/include/bits/regex.h
index 7576cd7a9c0..76219b5ae3b 100644
--- a/libstdc++-v3/include/bits/regex.h
+++ b/libstdc++-v3/include/bits/regex.h
@@ -838,6 +838,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
* @brief Swaps the contents of two regular expression objects.
* @param __lhs First regular expression.
* @param __rhs Second regular expression.
+   * @relates basic_regex
*/
   template
 inline void
@@ -937,10 +938,12 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
   { return this->_M_str().compare(__s); }
   // @}
 
+  /// @cond undocumented
   // Non-standard, used by comparison operators
   int
   _M_compare(const value_type* __s, size_t __n) const
   { return this->_M_str().compare({__s, __n}); }
+  /// @endcond
 
 private:
   // Simplified basic_string_view for C++11
@@ -1015,6 +1018,8 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
 
   // [7.9.2] sub_match non-member operators
 
+  /// @relates sub_match @{
+
   /**
* @brief Tests the equivalence of two regular expression submatches.
* @param __lhs First regular expression submatch.
@@ -1081,11 +1086,14 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
 operator>(const sub_match<_BiIter>& __lhs, const sub_match<_BiIter>& __rhs)
 { return __lhs.compare(__rhs) > 0; }
 
+  /// @cond undocumented
+
   // Alias for a basic_string that can be compared to a sub_match.
   template
 using __sub_match_string = basic_string<
  typename iterator_traits<_Bi_iter>::value_type,
  _Ch_traits, _Ch_alloc>;
+  /// @endcond
 
   /**
* @brief Tests the equivalence of a string and a regular expression
@@ -1554,6 +1562,8 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
   const sub_match<_Bi_iter>& __m)
 { return __os << __m.str(); }
 
+  // @} relates sub_match
+
   // [7.10] Class template match_results
 
   /**
@@ -1575,8 +1585,6 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
* of characters [first, second) which formed that match. Otherwise matched
* is false, and members first and second point to the end of the sequence
* that was searched.
-   *
-   * @nosubgrouping
*/
   template > >
@@ -1606,7 +1614,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
 
 public:
   /**
-   * @name 10.? Public Types
+   * @name 28.10 Public Types
*/
   //@{
   typedef sub_match<_Bi_iter> value_type;
@@ -1630,17 +1638,18 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
   /**
* @brief Constructs a default %match_results container.
* @post size() returns 0 and str() returns an empty string.
-   * @{
*/
   match_results() : match_results(_Alloc()) { }
 
+  /**
+   * @brief Constructs a default %match_results container.
+   * @post size() returns 0 and str() returns an empty string.
+   */
   explicit
   match_results(const _Alloc& __a) noexcept
   : _Base_type(__a)
   { }
 
-  // @}
-
   /**
* @brief Copy constructs a %match_results.
*/
@@ -1712,7 +1721,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
   //@}
 
   /**
-   * @name 10.3 Element Access
+   * @name 28.10.4 Element Access
*/
   //@{
 
@@ -1837,7 +1846,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
   //@}
 
   /**
-   * @name 10.4 Formatting
+   * @name 28.10.5 Formatting
*
* These functions perform formatted substitution of the matched
* character sequences into their target.  The format specifiers and
@@ -1898,7 +1907,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
   //@}
 
   /**
-   * @name 10.5 Allocator
+   * @name 28.10.6 Allocator
*/
   //@{
 
@@ -1912,7 +1921,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
   //@}
 
   /**
-   * @name 10.6 Swap
+   * @name 28.10.7 Swap
*/
//@{
 
@@ -1929,12 +1938,14 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
   //@}
 
 private:
-  template
-   friend class __detail::_Executor;
-
   template
friend class regex_iterator;
 
+  /// @cond undocumented
+
+  template
+   friend class __detail::_Executor;
+
   template
friend bool
@@ -1971,6 +1982,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
   { return _Base_type::operator[](_Base_type::size() - 1); }
 
   _Bi_iter _M_begin;
+  /// @endcond
 };
 
   typedef match_resultscmatch;
@@ -1981,10 +1993,11 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
 #endif
 
   // match_results comparisons
+
   /**
* 

Re: [v3 PATCH] Make stateful allocator propagation more consistent for operator+(basic_string) (P1165R1)

2019-05-07 Thread Jonathan Wakely

On 07/05/19 12:22 +0100, Jonathan Wakely wrote:

On 07/05/19 12:01 +0100, Nina Dinka Ranns wrote:

Ack. I've put the use of _Alloc_traits::is_always_equal within #if
__cplusplus >= 201703L block since it is officially a C++17 feature.
Let me know if you think that's an overkill.


Yes, that's overkill, we provide is_always_equal unconditionally from
C++11 onwards (to avoid ODR violations in code using different -std
options). Since it's defined fo C++11 we can use it for C++11.

I can remove that #if and test and commit the result for you though,
no need for another revision of the patch.


New changelog below. I didn't change the description of
operator+(basic_string&&,basic_string&&) as it's still technically
always resulting in an allocator from the first parameter.


Yes, that looks fine. Thanks!


I realised that for !_GLIBCXX_USE_CXX11_ABI we can always use the
optimization, because we don't support stateful allocators for the old
std::string ABI. So I adjusted the patch slightly for that.

I've attached what I'll commit.

Tested powerpc64le-linux (old and new ABI), committing to trunk.


commit 409ae90af9472e649df14c8d8147225d1cfa20ea
Author: Jonathan Wakely 
Date:   Tue May 7 15:21:47 2019 +0100

Make allocator propagation more consistent for operator+(basic_string) (P1165R1)

2019-05-01  Nina Dinka Ranns  

Make allocator propagation more consistent for
operator+(basic_string) (P1165R1)
* include/bits/basic_string.h
(operator+(basic_string&&, basic_string&&): Changed resulting
allocator to always be the one from the first parameter.
* include/bits/basic_string.tcc
(operator+(const _CharT*, const basic_string&)): Changed
resulting allocator to be SOCCC on the second parameter's allocator.
(operator+(_CharT, const basic_string&)): Likewise.
* testsuite/21_strings/basic_string/allocator/char/operator_plus.cc:
New.
* testsuite/21_strings/basic_string/allocator/wchar_t/operator_plus.cc:
New.

diff --git a/libstdc++-v3/include/bits/basic_string.h b/libstdc++-v3/include/bits/basic_string.h
index 4a6332a8968..5ebe86bad7d 100644
--- a/libstdc++-v3/include/bits/basic_string.h
+++ b/libstdc++-v3/include/bits/basic_string.h
@@ -6096,11 +6096,21 @@ _GLIBCXX_END_NAMESPACE_CXX11
 operator+(basic_string<_CharT, _Traits, _Alloc>&& __lhs,
 	  basic_string<_CharT, _Traits, _Alloc>&& __rhs)
 {
-  const auto __size = __lhs.size() + __rhs.size();
-  const bool __cond = (__size > __lhs.capacity()
-			   && __size <= __rhs.capacity());
-  return __cond ? std::move(__rhs.insert(0, __lhs))
-	: std::move(__lhs.append(__rhs));
+#if _GLIBCXX_USE_CXX11_ABI
+  using _Alloc_traits = allocator_traits<_Alloc>;
+  bool __use_rhs = false;
+  if _GLIBCXX17_CONSTEXPR (typename _Alloc_traits::is_always_equal{})
+	__use_rhs = true;
+  else if (__lhs.get_allocator() == __rhs.get_allocator())
+	__use_rhs = true;
+  if (__use_rhs)
+#endif
+	{
+	  const auto __size = __lhs.size() + __rhs.size();
+	  if (__size > __lhs.capacity() && __size <= __rhs.capacity())
+	return std::move(__rhs.insert(0, __lhs));
+	}
+  return std::move(__lhs.append(__rhs));
 }
 
   template
diff --git a/libstdc++-v3/include/bits/basic_string.tcc b/libstdc++-v3/include/bits/basic_string.tcc
index 314b8fe207f..e2315cb1467 100644
--- a/libstdc++-v3/include/bits/basic_string.tcc
+++ b/libstdc++-v3/include/bits/basic_string.tcc
@@ -1161,8 +1161,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   __glibcxx_requires_string(__lhs);
   typedef basic_string<_CharT, _Traits, _Alloc> __string_type;
   typedef typename __string_type::size_type	  __size_type;
+  typedef typename __gnu_cxx::__alloc_traits<_Alloc>::template
+	rebind<_CharT>::other _Char_alloc_type;
+  typedef __gnu_cxx::__alloc_traits<_Char_alloc_type> _Alloc_traits;
   const __size_type __len = _Traits::length(__lhs);
-  __string_type __str;
+  __string_type __str(_Alloc_traits::_S_select_on_copy(
+  __rhs.get_allocator()));
   __str.reserve(__len + __rhs.size());
   __str.append(__lhs, __len);
   __str.append(__rhs);
@@ -1175,7 +1179,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 {
   typedef basic_string<_CharT, _Traits, _Alloc> __string_type;
   typedef typename __string_type::size_type	  __size_type;
-  __string_type __str;
+  typedef typename __gnu_cxx::__alloc_traits<_Alloc>::template
+	rebind<_CharT>::other _Char_alloc_type;
+  typedef __gnu_cxx::__alloc_traits<_Char_alloc_type> _Alloc_traits;
+  __string_type __str(_Alloc_traits::_S_select_on_copy(
+  __rhs.get_allocator()));
   const __size_type __len = __rhs.size();
   __str.reserve(__len + 1);
   __str.append(__size_type(1), __lhs);
diff --git a/libstdc++-v3/testsuite/21_strings/basic_string/allocator/char/operator_plus.cc 

Re: [PATCH] Implement LWG 2686, hash

2019-05-07 Thread Jonathan Wakely

On 04/05/19 15:36 +0100, Jonathan Wakely wrote:

On 03/05/19 23:42 +0100, Jonathan Wakely wrote:

On 23/03/17 17:49 +, Jonathan Wakely wrote:

On 12/03/17 13:16 +0100, Daniel Krügler wrote:

The following is an *untested* patch suggestion, please verify.

Notes: My interpretation is that hash should be
defined outside of the _GLIBCXX_COMPATIBILITY_CXX0X block, please
double-check that course of action.


That's right.


I noticed that the preexisting hash did directly refer to
the private members of error_code albeit those have public access
functions. For consistency I mimicked that existing style when
implementing hash.


I see no reason for that, so I've removed the friend declaration and
used the public member functions.


I'm going to do the same for hash too. It can also use the
public members instead of being a friend.



Although this is a DR, I'm treating it as a new C++17 feature, so I've
adjusted the patch to only add the new specialization for C++17 mode.
We're too close to the GCC 7 release to be adding new things to the
default mode, even minor things like this. After GCC 7 is released we
can revisit it and decide if we want to enable it for all modes.


We never revisited that, and it's still only enabled for C++17 and up.
I guess that's OK, but we could enabled it for C++11 and 14 on trunk
if we want. Anybody care enough to argue for that?


Here's what I've tested and will be committing.





commit 90ca0fd91f5c65af370beb20af06bdca257aaf63
Author: Jonathan Wakely 
Date:   Thu Mar 23 11:47:39 2017 +

 Implement LWG 2686, std::hash, for C++17
 2017-03-23  Daniel Kruegler  
Implement LWG 2686, Why is std::hash specialized for error_code,
but not error_condition?
* include/std/system_error (hash): Define for C++17.
* testsuite/20_util/hash/operators/size_t.cc (hash):
Instantiate test for error_condition.
* testsuite/20_util/hash/requirements/explicit_instantiation.cc
(hash): Instantiate hash.


I'm adding a similar test for hash too.

Tested powerpc64le-linux, committing to trunk shortly.


commit 4034dddf0dbfc20ff9069602a419a95b09de20f6
Author: Jonathan Wakely 
Date:   Tue May 7 11:09:00 2019 +0100

Add test for std::hash

Copied from 19_diagnostics/error_condition/hash.cc added recently.

* testsuite/19_diagnostics/error_code/hash.cc: New test.

diff --git a/libstdc++-v3/testsuite/19_diagnostics/error_code/hash.cc b/libstdc++-v3/testsuite/19_diagnostics/error_code/hash.cc
new file mode 100644
index 000..2014e676878
--- /dev/null
+++ b/libstdc++-v3/testsuite/19_diagnostics/error_code/hash.cc
@@ -0,0 +1,50 @@
+// Copyright (C) 2019 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// .
+
+// { dg-do run { target c++11 } }
+
+#include 
+#include 
+
+struct error_cat : std::error_category
+{
+  error_cat(std::string s) : _name(s) { }
+  std::string _name;
+  const char* name() const noexcept override { return _name.c_str(); }
+  std::string message(int) const override { return "error"; }
+};
+
+void
+test01()
+{
+  std::hash h;
+  error_cat kitty("kitty"), moggy("moggy");
+  std::error_code cond1(99, kitty);
+  VERIFY( h(cond1) == h(cond1) );
+  std::error_code cond2(99, kitty);
+  VERIFY( h(cond1) == h(cond2) );
+  std::error_code cond3(88, kitty);
+  VERIFY( h(cond1) != h(cond3) );
+  std::error_code cond4(99, moggy);
+  VERIFY( h(cond1) != h(cond4) );
+}
+
+int
+main()
+{
+  test01();
+}


Re: [Aarch64][SVE] Vectorise sum-of-absolute-differences

2019-05-07 Thread Richard Sandiford
Alejandro Martinez Vicente  writes:
> +;; Helper expander for aarch64_abd_3 to save the callers
> +;; the hassle of constructing the other arm of the MINUS.
> +(define_expand "abd_3"
> +  [(use (match_operand:SVE_I 0 "register_operand"))
> +   (USMAX:SVE_I (match_operand:SVE_I 1 "register_operand")
> + (match_operand:SVE_I 2 "register_operand"))]
> +  "TARGET_SVE"
> +  {
> +rtx other_arm
> +  = simplify_gen_binary (, mode, operands[1], 
> operands[2]);

I realise this is just copied from the Advanced SIMD version, but
simplify_gen_binary is a bit dangerous here, since we explicitly want an
unsimplified  with the two operands given.  Probably better as:

  gen_rtx_ (mode, ...)

> +emit_insn (gen_aarch64_abd_3 (operands[0], operands[1],
> +operands[2], other_arm));
> +DONE;
> +  }
> +)
> +
> +;; Unpredicated integer absolute difference.
> +(define_expand "aarch64_abd_3"
> +  [(set (match_operand:SVE_I 0 "register_operand")
> + (unspec:SVE_I
> +   [(match_dup 4)
> +(minus:SVE_I
> +  (USMAX:SVE_I
> +(match_operand:SVE_I 1 "register_operand" "w")
> +(match_operand:SVE_I 2 "register_operand" "w"))
> +  (match_operator 3 "aarch64_"
> +[(match_dup 1)
> + (match_dup 2)]))]
> +   UNSPEC_MERGE_PTRUE))]
> +  "TARGET_SVE"
> +  {
> +operands[4] = force_reg (mode, CONSTM1_RTX (mode));
> +  }
> +)

I think we should go directly from abd_3 to the final
pattern, so that abd_3 does the force_reg too.  This
would make...

> +;; Predicated integer absolute difference.
> +(define_insn "*aarch64_abd_3"

...this the named pattern, instead of starting with "*".

> +  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?")
> + (unspec:SVE_I
> +   [(match_operand: 1 "register_operand" "Upl, Upl")
> +(minus:SVE_I
> +  (USMAX:SVE_I
> +(match_operand:SVE_I 2 "register_operand" "w, w")

Should be "0, w", so that the first alternative ties the input
to the output.

> +(match_operand:SVE_I 3 "register_operand" "w, w"))
> +  (match_operator 4 "aarch64_"
> +[(match_dup 2)
> + (match_dup 3)]))]
> +   UNSPEC_MERGE_PTRUE))]
> +  "TARGET_SVE"
> +  "@
> +   abd\t%0., %1/m, %0., %3.
> +   movprfx\t%0, %2\;abd\t%0., %1/m, %0., %3."
> +  [(set_attr "movprfx" "*,yes")]
> +)
> +
> +;; Emit a sequence to produce a sum-of-absolute-differences of the inputs in
> +;; operands 1 and 2.  The sequence also has to perform a widening reduction 
> of
> +;; the difference into a vector and accumulate that into operand 3 before
> +;; copying that into the result operand 0.
> +;; Perform that with a sequence of:
> +;; MOV   ones.b, #1
> +;; UABD  diff.b, p0/m, op1.b, op2.b
> +;; UDOT  op3.s, diff.b, ones.b
> +;; MOV   op0, op3  // should be eliminated in later passes.
> +;; The signed version just uses the signed variants of the above 
> instructions.

Think it would be clearer if we removed the last line and just used
[SU]ABD instead of UABD, since that's the only sign-dependent part
of the operation.  Also think we should SVEize it with MOVPRFX,
since a separate MOV should never be needed:

;; MOV  ones.b, #1
;; [SU]ABD  diff.b, ptrue/m, op1.b, op2.b
;; MOVPRFX  op0, op3// If necessary
;; UDOT op0.s, diff.b, ones.b

> +(define_expand "sad"
> +  [(use (match_operand:SVE_SDI 0 "register_operand"))
> +   (unspec: [(use (match_operand: 1 "register_operand"))
> + (use (match_operand: 2 "register_operand"))] ABAL)
> +   (use (match_operand:SVE_SDI 3 "register_operand"))]
> +  "TARGET_SVE"
> +  {
> +rtx ones = force_reg (mode, CONST1_RTX (mode));
> +rtx diff = gen_reg_rtx (mode);
> +emit_insn (gen_abd_3 (diff, operands[1], operands[2]));
> +emit_insn (gen_udot_prod (operands[3], diff, ones, operands[3]));
> +emit_move_insn (operands[0], operands[3]);

It would be better to make operands[0] the destination of the UDOT and
drop the move.

Thanks,
Richard

> +DONE;
> +  }
> +)
> diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
> index b3b2d6e..20aa0e9 100644
> --- a/gcc/config/aarch64/iterators.md
> +++ b/gcc/config/aarch64/iterators.md
> @@ -1060,6 +1060,9 @@
>  ;; Map smax to smin and umax to umin.
>  (define_code_attr max_opp [(smax "smin") (umax "umin")])
>  
> +;; Same as above, but louder.
> +(define_code_attr MAX_OPP [(smax "SMIN") (umax "UMIN")])

:-)


Re: [PATCH][stage1] Support profile (BB counts and edge probabilities) in GIMPLE FE.

2019-05-07 Thread Richard Biener
On May 7, 2019 4:33:08 PM GMT+02:00, "Martin Liška"  wrote:
>On 5/7/19 2:56 PM, Richard Biener wrote:
>> But that can use the existing get_hot_bb_threshold since we never
>want
>> to dump -1 in case min_count was never initialized.
>
>Yes. But the function will call:
>
>get_hot_bb_threshold ()
>{
>  if (min_count == -1)
>{
>gcov_type t = profile_info->sum_max / PARAM_VALUE
>(HOT_BB_COUNT_FRACTION);
>  set_hot_bb_threshold (t);
>...
>
>which will cause a segfault in non-PGO run. Note that:
>static gcov_type min_count = -1;
>
>is a non-exported variable so that's why I simply added the getter.
>
>Hope it's fine as is?

Oh, I see. Hmm, so we should get away with no min_coubt when all counter kinds 
are non-pgo? 

Richard. 

>
>Martin



Re: [PATCH][stage1] Support profile (BB counts and edge probabilities) in GIMPLE FE.

2019-05-07 Thread Martin Liška
On 5/7/19 2:56 PM, Richard Biener wrote:
> But that can use the existing get_hot_bb_threshold since we never want
> to dump -1 in case min_count was never initialized.

Yes. But the function will call:

get_hot_bb_threshold ()
{
  if (min_count == -1)
{
  gcov_type t = profile_info->sum_max / PARAM_VALUE (HOT_BB_COUNT_FRACTION);
  set_hot_bb_threshold (t);
...

which will cause a segfault in non-PGO run. Note that:
static gcov_type min_count = -1;

is a non-exported variable so that's why I simply added the getter.

Hope it's fine as is?

Martin



Re: [libcpp] Reimplement mkdeps data structures

2019-05-07 Thread Christophe Lyon
Hi Nathan,

On Tue, 7 May 2019 at 14:39, Nathan Sidwell  wrote:
>
> This patch reimplements the header dependency data structures.  We can
> now use a vector class, rather than cut-n-paste 3 sets of bespoke
> C-style array handling.  Sadly, simply using vec.h didn't work, so I do
> have one internal vector class.
>
> The other change is that, rather than apply quoting on adding the
> dependencies, we apply it when writing them out.  This'll permit writing
> the dependencies in different forms (later).  Because we have both -MT
> already-quoted-target and MQ apply-quote-target we need to remember
> which was used.  Fortunately, we record these targets first, so we can
> just record when we stopped adding already-quoted targets.
>


After your commit, I'm seeing an ICE while building glibc headers:
: internal compiler error: Segmentation fault
0xc2eeaf crash_signal

/tmp/2191418_6.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/toplev.c:326
0x151ad0d munge

/tmp/2191418_6.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/libcpp/mkdeps.c:176
0x151adb6 make_write_name

/tmp/2191418_6.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/libcpp/mkdeps.c:320
0x151adb6 make_write_vec

/tmp/2191418_6.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/libcpp/mkdeps.c:348
0x151b24b make_write

/tmp/2191418_6.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/libcpp/mkdeps.c:367
0x151b24b deps_write(mkdeps const*, _IO_FILE*, bool, unsigned int)

/tmp/2191418_6.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/libcpp/mkdeps.c:381
0x150e007 cpp_finish(cpp_reader*, _IO_FILE*)

/tmp/2191418_6.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/libcpp/init.c:769
0x6f91f7 c_common_finish()

/tmp/2191418_6.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/c-family/c-opts.c:1221
0x5ff089 finalize

/tmp/2191418_6.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/toplev.c:2105
0x5ff089 do_compile

/tmp/2191418_6.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/toplev.c:2214
Please submit a full bug report,

while trying to build glibc-headers/bits/stdio_lim.st

(seen on aarch64)

Can you check?

Thanks,

Christophe

> nathan
> --
> Nathan Sidwell


RE: [Aarch64][SVE] Vectorise sum-of-absolute-differences

2019-05-07 Thread Alejandro Martinez Vicente
Hi,

I updated the patch after the dot product went in. This is the new covet letter:

This patch adds support to vectorize sum of abslolute differences (SAD_EXPR)
using SVE.

Given this input code:

int
sum_abs (uint8_t *restrict x, uint8_t *restrict y, int n)
{
  int sum = 0;

  for (int i = 0; i < n; i++)
{
  sum += __builtin_abs (x[i] - y[i]);
}

  return sum;
}

The resulting SVE code is:

 :
   0:   715fcmp w2, #0x0
   4:   5400026db.le50 
   8:   d283mov x3, #0x0// #0
   c:   93407c42sxtwx2, w2
  10:   2538c002mov z2.b, #0
  14:   25221fe0whilelo p0.b, xzr, x2
  18:   2538c023mov z3.b, #1
  1c:   2518e3e1ptrue   p1.b
  20:   a4034000ld1b{z0.b}, p0/z, [x0, x3]
  24:   a4034021ld1b{z1.b}, p0/z, [x1, x3]
  28:   0430e3e3incbx3
  2c:   0520c021sel z1.b, p0, z1.b, z0.b
  30:   25221c60whilelo p0.b, x3, x2
  34:   040d0420uabdz0.b, p1/m, z0.b, z1.b
  38:   44830402udotz2.s, z0.b, z3.b
  3c:   5421b.ne20   // b.any
  40:   2598e3e0ptrue   p0.s
  44:   04812042uaddv   d2, p0, z2.s
  48:   1e260040fmovw0, s2
  4c:   d65f03c0ret
  50:   1e2703e2fmovs2, wzr
  54:   1e260040fmovw0, s2
  58:   d65f03c0ret

Notice how udot is used inside a fully masked loop.

I tested this patch in an aarch64 machine bootstrapping the compiler and
running the checks.

Alejandro

gcc/Changelog:

2019-05-07  Alejandro Martinez  

* config/aarch64/aarch64-sve.md (abd_3): New define_expand.
(aarch64_abd_3): Likewise.
(*aarch64_abd_3): New define_insn.
(sad): New define_expand.
* config/aarch64/iterators.md: Added MAX_OPP attribute.
* tree-vect-loop.c (use_mask_by_cond_expr_p): Add SAD_EXPR.
(build_vect_cond_expr): Likewise.

gcc/testsuite/Changelog:
 
2019-05-07  Alejandro Martinez  

* gcc.target/aarch64/sve/sad_1.c: New test for sum of absolute
differences.

> -Original Message-
> From: gcc-patches-ow...@gcc.gnu.org 
> On Behalf Of Alejandro Martinez Vicente
> Sent: 11 February 2019 15:38
> To: James Greenhalgh 
> Cc: GCC Patches ; nd ; Richard
> Sandiford ; Richard Biener
> 
> Subject: RE: [Aarch64][SVE] Vectorise sum-of-absolute-differences
> 
> > -Original Message-
> > From: James Greenhalgh 
> > Sent: 06 February 2019 17:42
> > To: Alejandro Martinez Vicente 
> > Cc: GCC Patches ; nd ; Richard
> > Sandiford ; Richard Biener
> > 
> > Subject: Re: [Aarch64][SVE] Vectorise sum-of-absolute-differences
> >
> > On Mon, Feb 04, 2019 at 07:34:05AM -0600, Alejandro Martinez Vicente
> > wrote:
> > > Hi,
> > >
> > > This patch adds support to vectorize sum of absolute differences
> > > (SAD_EXPR) using SVE. It also uses the new functionality to ensure
> > > that the resulting loop is masked. Therefore, it depends on
> > >
> > > https://gcc.gnu.org/ml/gcc-patches/2019-02/msg00016.html
> > >
> > > Given this input code:
> > >
> > > int
> > > sum_abs (uint8_t *restrict x, uint8_t *restrict y, int n) {
> > >   int sum = 0;
> > >
> > >   for (int i = 0; i < n; i++)
> > > {
> > >   sum += __builtin_abs (x[i] - y[i]);
> > > }
> > >
> > >   return sum;
> > > }
> > >
> > > The resulting SVE code is:
> > >
> > >  :
> > >0: 715fcmp w2, #0x0
> > >4: 5400026db.le50 
> > >8: d283mov x3, #0x0// #0
> > >c: 93407c42sxtwx2, w2
> > >   10: 2538c002mov z2.b, #0
> > >   14: 25221fe0whilelo p0.b, xzr, x2
> > >   18: 2538c023mov z3.b, #1
> > >   1c: 2518e3e1ptrue   p1.b
> > >   20: a4034000ld1b{z0.b}, p0/z, [x0, x3]
> > >   24: a4034021ld1b{z1.b}, p0/z, [x1, x3]
> > >   28: 0430e3e3incbx3
> > >   2c: 0520c021sel z1.b, p0, z1.b, z0.b
> > >   30: 25221c60whilelo p0.b, x3, x2
> > >   34: 040d0420uabdz0.b, p1/m, z0.b, z1.b
> > >   38: 44830402udotz2.s, z0.b, z3.b
> > >   3c: 5421b.ne20   // b.any
> > >   40: 2598e3e0ptrue   p0.s
> > >   44: 04812042uaddv   d2, p0, z2.s
> > >   48: 1e260040fmovw0, s2
> > >   4c: d65f03c0ret
> > >   50: 1e2703e2fmovs2, wzr
> > >   54: 1e260040fmovw0, s2
> > >   58: d65f03c0ret
> > >
> > > Notice how udot is used inside a fully masked loop.
> > >
> > > I tested this patch in an aarch64 machine bootstrapping the compiler
> > > and running the checks.
> >
> > This doesn't give us much confidence in SVE coverage; unless you have
> > been running in an environment using SVE by default? Do you have some
> > set of workloads you could 

Re: [PATCH] PR fortran/90166 -- Add check for module prefix

2019-05-07 Thread Steve Kargl
On Tue, May 07, 2019 at 10:44:34AM +0200, Dominique d'Humières wrote:
> Hi Steve,
> 
> > Ping.
> 
> AFAICT this has been committed as revision r270495.
> 

Whoops, you're correct.

I have a few too many diffs lying around.

-- 
Steve


Re: [PATCH] Eliminates phi on branch for CMP result

2019-05-07 Thread Richard Biener
On Mon, 6 May 2019, Jiufu Guo wrote:

> Hi,
> 
> This patch implements the optimization in PR77820.  The optimization
> eliminates phi and phi's basic block, if the phi is used only by
> condition branch, and the phi's incoming value in the result of a
> CMP result.
> 
> This optimization eliminates:
> 1. saving CMP result: p0 = a CMP b.
> 2. additional CMP on branch: if (phi != 0).
> 3. converting CMP result if there is phi = (INT_CONV) p0 if there is.
> 
>   
>   p0 = a CMP b
>   goto ;
> 
>   
>   p1 = c CMP d
>   goto ;
> 
>   
>   # phi = PHI 
>   if (phi != 0) goto ; else goto ;
> 
> Transform to:
> 
>   
>   p0 = a CMP b
>   if (p0 != 0) goto ; else goto ;
> 
>   
>   p1 = c CMP d
>   if (p1 != 0) goto ; else goto ;
> 
> Bootstrapped and tested on powerpc64le with no regressions, and testcases were
> saved. Is this ok for trunk?

I'm not sure I like a new pass here ;)  The transform is basically
tail-duplicating the PHI block because the exit conditional can
be "simplfied" - that's something jump threading generally does
though it relies on "simplified" being a little bit more simplified
than above.

I suspect this transform was implemented because of some benchmark?

I suspect the performance benefit is because of better branch
prediction by not mangling both conditional branches into one?

The transform is also somewhat similar to tail-duplication done
in path splitting or tracer.

The pass itself does quite strict pattern-matching but I wonder
if more cases should be handled this way.

Any specific reason for the pass placement between PRE and sinking?
tracer and path splitting run much later, jump threading runs
all over the place.

Thanks,
Richard.

> Thanks!
> 
> [gcc]
> 2019-05-06  Jiufu Guo  
>   Lijia He  
> 
>   PR tree-optimization/77820
>   * tree-ssa-mergephicmp.c: New file.
>   * Makefile.in (OBJS): Add tree-ssa-mergephicmp.o.
>   * common.opt (ftree-mergephicmp): New flag.
>   * passes.def (pass_mergephicmp): New pass.
>   * timevar.def (TV_TREE_MERGEPHICMP): New timevar.
>   * tree-pass.h: New file.
> 
> [gcc/testsuite]
> 2019-05-06  Jiufu Guo  
>   Lijia He  
> 
>   PR tree-optimization/77820
>   * gcc.dg/tree-ssa/phi_on_compare-1.c: New testcase.
>   * gcc.dg/tree-ssa/phi_on_compare-2.c: New testcase.
> 
> 
> ---
>  gcc/Makefile.in  |   1 +
>  gcc/common.opt   |   4 +
>  gcc/passes.def   |   1 +
>  gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c |  31 +++
>  gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c |  31 +++
>  gcc/timevar.def  |   1 +
>  gcc/tree-pass.h  |   1 +
>  gcc/tree-ssa-mergephicmp.c   | 260 
> +++
>  8 files changed, 330 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c
>  create mode 100644 gcc/tree-ssa-mergephicmp.c
> 
> diff --git a/gcc/Makefile.in b/gcc/Makefile.in
> index d186d71..9729501 100644
> --- a/gcc/Makefile.in
> +++ b/gcc/Makefile.in
> @@ -1567,6 +1567,7 @@ OBJS = \
>   tree-ssa-reassoc.o \
>   tree-ssa-sccvn.o \
>   tree-ssa-scopedtables.o \
> + tree-ssa-mergephicmp.o \
>   tree-ssa-sink.o \
>   tree-ssa-strlen.o \
>   tree-ssa-structalias.o \
> diff --git a/gcc/common.opt b/gcc/common.opt
> index d342c4f..5ea5ed2 100644
> --- a/gcc/common.opt
> +++ b/gcc/common.opt
> @@ -2702,6 +2702,10 @@ ftree-salias
>  Common Ignore
>  Does nothing.  Preserved for backward compatibility.
>  
> +ftree-mergephicmp
> +Common Report Var(flag_mergephicmp) Init(1) Optimization
> +Enable optimization on branch phi compare on trees.
> +
>  ftree-sink
>  Common Report Var(flag_tree_sink) Optimization
>  Enable SSA code sinking on trees.
> diff --git a/gcc/passes.def b/gcc/passes.def
> index 446a7c4..e3a3913 100644
> --- a/gcc/passes.def
> +++ b/gcc/passes.def
> @@ -249,6 +249,7 @@ along with GCC; see the file COPYING3.  If not see
>NEXT_PASS (pass_lim);
>NEXT_PASS (pass_walloca, false);
>NEXT_PASS (pass_pre);
> +  NEXT_PASS (pass_mergephicmp);
>NEXT_PASS (pass_sink_code);
>NEXT_PASS (pass_sancov);
>NEXT_PASS (pass_asan);
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c
> new file mode 100644
> index 000..2e3f4f6
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c
> @@ -0,0 +1,31 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-mergephicmp" } */
> +
> +void g (void);
> +void g1 (void);
> +
> +void
> +f (long a, long b, long c, long d, int x)
> +{
> +  _Bool t;
> +  if (x)
> +{
> +  t = a < b;
> +}
> +  else if (d == a + b)
> +{
> +  t = c < d;
> +}
> +  else
> +{
> +  t = a == c;
> +}
> +
> + 

[PATCH] Handle more invariant compares in vectorization

2019-05-07 Thread Richard Biener


Another partial merge.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.

Richard.

>From 3daa15805e831d77d5f1420ec6543a67b1870119 Mon Sep 17 00:00:00 2001
From: Richard Guenther 
Date: Mon, 21 Jan 2019 13:48:32 +0100
Subject: [PATCH] allow-some-invariant-conds-with-SLP

* tree-vect-stmts.c (vect_is_simple_cond): When vectype is
not specified still compute a comp_vectype for invariant
compares.

diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 247d4353cb5..0f4930e7ee6 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -8862,11 +8862,12 @@ vect_is_simple_cond (tree cond, vec_info *vinfo,
 
   *comp_vectype = vectype1 ? vectype1 : vectype2;
   /* Invariant comparison.  */
-  if (! *comp_vectype && vectype)
+  if (! *comp_vectype)
 {
   tree scalar_type = TREE_TYPE (lhs);
   /* If we can widen the comparison to match vectype do so.  */
   if (INTEGRAL_TYPE_P (scalar_type)
+ && vectype
  && tree_int_cst_lt (TYPE_SIZE (scalar_type),
  TYPE_SIZE (TREE_TYPE (vectype
scalar_type = build_nonstandard_integer_type


Re: [PATCH] Fold (x + 0.0) + 0.0 to x + 0.0 (PR tree-optimization/90356)

2019-05-07 Thread Richard Biener
On Tue, 7 May 2019, Jakub Jelinek wrote:

> On Tue, May 07, 2019 at 01:50:23PM +0200, Marc Glisse wrote:
> > > And actually it seems that we could optimize the plus1 == plus2 cases
> > > even if HONOR_SIGN_DEPENDENT_ROUNDING (type), because even in fesetenv
> > > (FE_DOWNWARD) mode the testcase prints the first two (in all other modes 
> > > all
> > > 4).
> > 
> > It is very hard to judge what is ok with -frounding-math, because that mode
> > is already unusably broken (I use a pass-through asm volatile to protect the
> > arguments and result of every operation instead). One important aspect of
> > the optimization is whether both operations use the same rounding mode, or
> > if there may be a call to fesetround in between. Probably we shouldn't care
> > about -frounding-mode, since anyway it is likely that it will use some
> > IFN_FANCY_PLUS instead of PLUS_EXPR if it is ever implemented.
> 
> I haven't thought about
>  t = x + 0.0;
>  fesetround (...);
>  y = t + 0.0;
> indeed, let's take -frounding-math out of the patch now.  If we improve
> that mode, such as through explicit dependencies on the floating point state
> in the IL, we can get back to this case too.
> 
> > > + (inner_op @0 @1
> > 
> > Shouldn't you give it a name in the source pattern and return that, instead
> > of creating a new statement? Or are you doing the operation a second time on
> 
> Good idea.
> 
> > purpose in case the rounding mode changed or to force an exception?
> > 
> > > + (outer_op @0 @2)
> > 
> > With sNaN, this may raise a second exception where we used to have only
> > qNaN+0, no? And the handling of exceptions may have changed in between, etc.
> 
> IEEE 754 I believe says that for x non-zero x + (+/-0.0) = x and the only
> exception raised could be invalid exception if x is sNaN or the Intel
> denormal operand exception (I think we generally don't care about that one)
> and nothing else (there should be no overflow nor underflow nor inexact and
> obviously no division by zero).  If the invalid exception is masked off,
> then I believe one can't distinguish between the x + 0.0 and (x + 0.0) + 0.0
> computations, already x + 0.0 will raise IE and turn the sNaN into qNaN and
> the optional second + 0.0 will just keep that to be a qNaN without further
> exceptions, unless there is some library call in between which queries the
> accumulated exceptions, clears it etc.  I believe handling that case right
> is only possible if we make those dependencies in the IL explicit and under
> non-default flags.  In any case, I don't see a difference between the
> @3 case where we keep the inner op and the case where we keep the outer op
> but remove the inner op.  Both behave the same.
> 
> Here is an updated patch with your @3 idea and taking out -frounding-math
> stuff.

OK if there are no further comments.

Richard.

> 2019-05-07  Jakub Jelinek  
> 
>   PR tree-optimization/90356
>   * match.pd ((X +/- 0.0) +/- 0.0): Optimize into X +/- 0.0 if possible.
> 
>   * gcc.dg/tree-ssa/pr90356-1.c: New test.
>   * gcc.dg/tree-ssa/pr90356-2.c: New test.
>   * gcc.dg/tree-ssa/pr90356-3.c: New test.
>   * gcc.dg/tree-ssa/pr90356-4.c: New test.
> 
> --- gcc/match.pd.jj   2019-05-07 13:56:53.062954181 +0200
> +++ gcc/match.pd  2019-05-07 14:30:36.010474285 +0200
> @@ -152,6 +152,28 @@ (define_operator_list COND_TERNARY
>   (if (fold_real_zero_addition_p (type, @1, 1))
>(non_lvalue @0)))
>  
> +/* Even if the fold_real_zero_addition_p can't simplify X + 0.0
> +   into X, we can optimize (X + 0.0) + 0.0 or (X + 0.0) - 0.0
> +   or (X - 0.0) + 0.0 into X + 0.0 and (X - 0.0) - 0.0 into X - 0.0
> +   if not -frounding-math.  For sNaNs the first operation would raise
> +   exceptions but turn the result into qNan, so the second operation
> +   would not raise it.   */
> +(for inner_op (plus minus)
> + (for outer_op (plus minus)
> +  (simplify
> +   (outer_op (inner_op@3 @0 REAL_CST@1) REAL_CST@2)
> +(if (real_zerop (@1)
> +  && real_zerop (@2)
> +  && !HONOR_SIGN_DEPENDENT_ROUNDING (type))
> + (with { bool inner_plus = ((inner_op == PLUS_EXPR)
> + ^ REAL_VALUE_MINUS_ZERO (TREE_REAL_CST (@1)));
> +  bool outer_plus
> += ((outer_op == PLUS_EXPR)
> +   ^ REAL_VALUE_MINUS_ZERO (TREE_REAL_CST (@2))); }
> +  (if (outer_plus && !inner_plus)
> +   (outer_op @0 @2)
> +   @3))
> +
>  /* Simplify x - x.
> This is unsafe for certain floats even in non-IEEE formats.
> In IEEE, it is unsafe because it does wrong for NaNs.
> --- gcc/testsuite/gcc.dg/tree-ssa/pr90356-1.c.jj  2019-05-07 
> 14:27:17.912654939 +0200
> +++ gcc/testsuite/gcc.dg/tree-ssa/pr90356-1.c 2019-05-07 14:27:17.912654939 
> +0200
> @@ -0,0 +1,23 @@
> +/* PR tree-optimization/90356 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fno-rounding-math -fsignaling-nans -fsigned-zeros 
> -fdump-tree-optimized" } */
> +/* { dg-final { scan-tree-dump-times 

Re: [PATCH] Fold (x + 0.0) + 0.0 to x + 0.0 (PR tree-optimization/90356)

2019-05-07 Thread Jakub Jelinek
On Tue, May 07, 2019 at 01:50:23PM +0200, Marc Glisse wrote:
> > And actually it seems that we could optimize the plus1 == plus2 cases
> > even if HONOR_SIGN_DEPENDENT_ROUNDING (type), because even in fesetenv
> > (FE_DOWNWARD) mode the testcase prints the first two (in all other modes all
> > 4).
> 
> It is very hard to judge what is ok with -frounding-math, because that mode
> is already unusably broken (I use a pass-through asm volatile to protect the
> arguments and result of every operation instead). One important aspect of
> the optimization is whether both operations use the same rounding mode, or
> if there may be a call to fesetround in between. Probably we shouldn't care
> about -frounding-mode, since anyway it is likely that it will use some
> IFN_FANCY_PLUS instead of PLUS_EXPR if it is ever implemented.

I haven't thought about
 t = x + 0.0;
 fesetround (...);
 y = t + 0.0;
indeed, let's take -frounding-math out of the patch now.  If we improve
that mode, such as through explicit dependencies on the floating point state
in the IL, we can get back to this case too.

> > +   (inner_op @0 @1
> 
> Shouldn't you give it a name in the source pattern and return that, instead
> of creating a new statement? Or are you doing the operation a second time on

Good idea.

> purpose in case the rounding mode changed or to force an exception?
> 
> > +   (outer_op @0 @2)
> 
> With sNaN, this may raise a second exception where we used to have only
> qNaN+0, no? And the handling of exceptions may have changed in between, etc.

IEEE 754 I believe says that for x non-zero x + (+/-0.0) = x and the only
exception raised could be invalid exception if x is sNaN or the Intel
denormal operand exception (I think we generally don't care about that one)
and nothing else (there should be no overflow nor underflow nor inexact and
obviously no division by zero).  If the invalid exception is masked off,
then I believe one can't distinguish between the x + 0.0 and (x + 0.0) + 0.0
computations, already x + 0.0 will raise IE and turn the sNaN into qNaN and
the optional second + 0.0 will just keep that to be a qNaN without further
exceptions, unless there is some library call in between which queries the
accumulated exceptions, clears it etc.  I believe handling that case right
is only possible if we make those dependencies in the IL explicit and under
non-default flags.  In any case, I don't see a difference between the
@3 case where we keep the inner op and the case where we keep the outer op
but remove the inner op.  Both behave the same.

Here is an updated patch with your @3 idea and taking out -frounding-math
stuff.

2019-05-07  Jakub Jelinek  

PR tree-optimization/90356
* match.pd ((X +/- 0.0) +/- 0.0): Optimize into X +/- 0.0 if possible.

* gcc.dg/tree-ssa/pr90356-1.c: New test.
* gcc.dg/tree-ssa/pr90356-2.c: New test.
* gcc.dg/tree-ssa/pr90356-3.c: New test.
* gcc.dg/tree-ssa/pr90356-4.c: New test.

--- gcc/match.pd.jj 2019-05-07 13:56:53.062954181 +0200
+++ gcc/match.pd2019-05-07 14:30:36.010474285 +0200
@@ -152,6 +152,28 @@ (define_operator_list COND_TERNARY
  (if (fold_real_zero_addition_p (type, @1, 1))
   (non_lvalue @0)))
 
+/* Even if the fold_real_zero_addition_p can't simplify X + 0.0
+   into X, we can optimize (X + 0.0) + 0.0 or (X + 0.0) - 0.0
+   or (X - 0.0) + 0.0 into X + 0.0 and (X - 0.0) - 0.0 into X - 0.0
+   if not -frounding-math.  For sNaNs the first operation would raise
+   exceptions but turn the result into qNan, so the second operation
+   would not raise it.   */
+(for inner_op (plus minus)
+ (for outer_op (plus minus)
+  (simplify
+   (outer_op (inner_op@3 @0 REAL_CST@1) REAL_CST@2)
+(if (real_zerop (@1)
+&& real_zerop (@2)
+&& !HONOR_SIGN_DEPENDENT_ROUNDING (type))
+ (with { bool inner_plus = ((inner_op == PLUS_EXPR)
+   ^ REAL_VALUE_MINUS_ZERO (TREE_REAL_CST (@1)));
+bool outer_plus
+  = ((outer_op == PLUS_EXPR)
+ ^ REAL_VALUE_MINUS_ZERO (TREE_REAL_CST (@2))); }
+  (if (outer_plus && !inner_plus)
+   (outer_op @0 @2)
+   @3))
+
 /* Simplify x - x.
This is unsafe for certain floats even in non-IEEE formats.
In IEEE, it is unsafe because it does wrong for NaNs.
--- gcc/testsuite/gcc.dg/tree-ssa/pr90356-1.c.jj2019-05-07 
14:27:17.912654939 +0200
+++ gcc/testsuite/gcc.dg/tree-ssa/pr90356-1.c   2019-05-07 14:27:17.912654939 
+0200
@@ -0,0 +1,23 @@
+/* PR tree-optimization/90356 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-rounding-math -fsignaling-nans -fsigned-zeros 
-fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-times "x_\[0-9]*.D. \\+ 0.0;" 12 "optimized" } 
} */
+/* { dg-final { scan-tree-dump-times "y_\[0-9]*.D. - 0.0;" 4 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " \[+-] 0.0;" 16 "optimized" } } */
+
+double f1 (double x) { return (x + 0.0) + 0.0; }
+double f2 

Re: [PATCH 2/2] Support {MIN,MAX}_EXPR in GIMPLE FE.

2019-05-07 Thread Richard Biener
On Tue, May 7, 2019 at 2:01 PM Martin Liška  wrote:
>
> On 5/6/19 1:35 PM, Richard Biener wrote:
> > On Mon, May 6, 2019 at 10:00 AM Martin Liška  wrote:
> >>
> >> Hi.
> >>
> >> The patch is about support of a new GIMPLE expr.
> >>
> >> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
> >>
> >> Ready to be installed?
> >
> > Can you please avoid using/changing parser_build_binary_op?  The other
> > binary expression handling just does
> >
> >   if (lhs.value != error_mark_node && rhs.value != error_mark_node)
> > ret.value = build2_loc (ret_loc, code, ret_type, lhs.value, rhs.value);
> >
> > which should work equally well here.  I think for future expansion
> > splitting out the ( op, op ) parsing and expression building into
> > a function might be nicer so that c_parser_gimple_unary_expression
> > reads
> >
> >   if (strcmp (INDENTIFIER_POINTER (id), "__MIN") == 0)
> > return c_parser_gimple_parentized_binary_expression (op_loc, MIN_EXPR);
> >   else if (...)
> >
> > OK with such change/factoring.
>
> I've done all what you pointed out.

OK.

Thanks,
Richard.

> Martin
>
> >
> > Thanks,
> > Richard.
> >
> >> Thanks,
> >> Martin
>


Re: [PATCH][stage1] Support profile (BB counts and edge probabilities) in GIMPLE FE.

2019-05-07 Thread Richard Biener
On Tue, May 7, 2019 at 2:00 PM Martin Liška  wrote:
>
> On 5/6/19 4:02 PM, Richard Biener wrote:
> > On Mon, May 6, 2019 at 9:59 AM Martin Liška  wrote:
> >>
> >> On 5/2/19 2:31 PM, Richard Biener wrote:
> >>> On Mon, Apr 29, 2019 at 2:51 PM Martin Liška  wrote:
> 
>  On 4/26/19 3:18 PM, Richard Biener wrote:
> > On Wed, Apr 10, 2019 at 10:12 AM Martin Liška  wrote:
> >>
> >> On 4/9/19 3:19 PM, Jan Hubicka wrote:
>  Hi.
> 
>  There's updated version that supports profile quality for both counts
>  and probabilities. I'm wondering whether ENTRY and EXIT BBs needs to
>  have set probability. Apparently, I haven't seen any verifier that
>  would complain.
> >>>
> >>> Well, you do not need to define it but then several cases will
> >>> degenerate. In particular BB frequencies (for callgraph profile or
> >>> hot/cold decisions) are calculated as ratios of entry BB and given BB
> >>> count. If entry BB is undefined you will get those undefined and
> >>> heuristics will resort to conservative answers.
> >>>
> >>> I do not think we use exit block count.
> >>>
> >>> Honza
> >>>
> >>
> >> Thank you Honza for explanation. I'm sending version of the patch
> >> that supports entry BB count.
> >>
> >> I've been testing the patch right now.
> >
> > Can you move the GIMPLE/RTL FE specific data in c_declspecs to
> > a substructure accessed via indirection?  I guess enlarging that
> > isn't really what we should do.  You'd move gimple_or_rtl_pass
> > there and make that pointer one to a struct aux_fe_data
> > (lifetime managed by the respective RTL/GIMPLE FE, thus
> > to be freed there)?  Joseph, do you agree or is adding more
> > stuff to c_declspecs OK (I would guess it could be a few more
> > elements in the future).
> 
>  Let's wait here for Joseph.
> >>>
> >>> So looks like it won't matter so let's go with the current approach
> >>> for the moment.
> >>>
> >
> > -c_parser_gimple_parse_bb_spec (tree val, int *index)
> > +c_parser_gimple_parse_bb_spec (tree val, gimple_parser ,
> > +  int *index, profile_probability 
> > *probablity)
> >  {
> >
> > so this will allow specifying probability in PHI node arguments.
> > I think we want to split this into the already existing part
> > and a c_parser_gimple_parse_bb_spec_with_edge_probability
> > to be used at edge sources.
> 
>  Yes, that's a good idea!
> 
> >
> > +  if (cfun->curr_properties & PROP_cfg)
> > +{
> > +  update_max_bb_count ();
> > +  set_hot_bb_threshold (hot_bb_threshold);
> > +  ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = entry_bb_count;
> >
> > I guess the last one should be before update_max_bb_count ()?
> 
>  Same here.
> 
> >
> > +}
> >
> > + /* Parse profile: quality(value) */
> >   else
> > {
> > - c_parser_error (parser, "unknown block 
> > specifier");
> > - return return_p;
> > + tree q;
> > + profile_quality quality;
> > + tree v = c_parser_peek_token (parser)->value;
> >
> > peek next token before the if/else and do
> >
> >else if (!strcmp (...))
> >
> > as in the loop_header case.  I expected we can somehow share
> > parsing of profile quality and BB/edge count/frequency?  How's
> > the expected syntax btw, comments in the code should tell us.
> > I'm guessing it's quality-id '(' number ')' and thus it should be
> > really shareable between edge and BB count and also __GIMPLE
> > header parsing?  So parse_profile_quality should be
> > parse_profile () instead, resulting in a combined value
> > (we do use the same for edge/bb?).
> 
>  It's problematic, there are different error messages for count/frequency.
>  Moreover call to parse_profile_quality in 
>  c_parser_gimple_or_rtl_pass_list
>  is a way how to test that next 'token' is a profile count.
> >>>
> >>> Who cares about error messages...  But sure, I'm just proposing to
> >>> merge testing for next token and actual parsing.
> >>
> >> After I've done removal of hot_bb_threshold parsing, there are just 2 
> >> usages
> >> of parse_profile_quality. I would like to leave it as it, not introducing 
> >> a wrappers.
> >>
> >>>
> >
> > +  else if (!strcmp (op, "hot_bb_threshold"))
> > +   {
> >
> > I'm not sure about this - it doesn't make sense to specify this
> > on a per-function base since it seems to control a global
> > variable (booo!)?
> 
>  Yep, shame on me!
> 
> > Isn't this instead computed on-demand
> > based on profile_info->sum_max?
> 
>  

Re: [ada, build] Fix make install-gcc-specs with empty GCC_SPEC_FILES

2019-05-07 Thread Arnaud Charlet
> When installing gcc 9.1.0 on Solaris 10 with CONFIG_SHELL=/bin/ksh, it
> failed in the same way as originally fixed by
> 
>   https://gcc.gnu.org/ml/gcc-patches/2016-05/msg00087.html
> 
> While the patch still is on the gcc-5 and gcc-6 branches, it has been
> lost (inadvertently, I assume) on trunk before gcc-7 branched by r244367.
> 
> I'd like to restore the fix, preferably on all of mainline and the
> gcc-9, gcc-8, and gcc-7 branches.
> 
> Tested with the gcc 9.1.0 release on i386-pc-solaris2.10 and
> sparc-sun-solaris2.10.  Ok?

OK


[libcpp] Reimplement mkdeps data structures

2019-05-07 Thread Nathan Sidwell
This patch reimplements the header dependency data structures.  We can 
now use a vector class, rather than cut-n-paste 3 sets of bespoke 
C-style array handling.  Sadly, simply using vec.h didn't work, so I do 
have one internal vector class.


The other change is that, rather than apply quoting on adding the 
dependencies, we apply it when writing them out.  This'll permit writing 
the dependencies in different forms (later).  Because we have both -MT 
already-quoted-target and MQ apply-quote-target we need to remember 
which was used.  Fortunately, we record these targets first, so we can 
just record when we stopped adding already-quoted targets.


nathan
--
Nathan Sidwell
2019-05-07  Nathan Sidwell  

	* include/mkdeps.h (deps_write): Add PHONY arg.
	(deps_phony_targets): Delete.
	* init.c (cpp_finish): Just call deps_write.
	* mkdeps.c (struct mkdeps): Add local vector class.  Reimplement
	vector handling.
	(munge): Munge to static buffer.
	(apply_vpath): Adjust vector handling.
	(deps_init, deps_free): Use new, delete.
	(deps_add_target): Do not munge here.  Record quoting low water mark.
	(deps_add_dep): Do not munge here.
	(deps_add_vpath): Adjust vector handling.
	(make_write_name): New.  Munge on demand here.
	(make_write_vec): New.
	(deps_phony_targets): Delete.
	(make_write): New.
	(deps_write): Forward to deps_Write.
	(deps_save, deps_restore): Adjust vector handling.

Index: libcpp/include/mkdeps.h
===
--- libcpp/include/mkdeps.h	(revision 270940)
+++ libcpp/include/mkdeps.h	(working copy)
@@ -57,7 +57,7 @@ extern void deps_add_dep (struct mkdeps
 
 /* Write out a deps buffer to a specified file.  The third argument
is the number of columns to word-wrap at (0 means don't wrap).  */
-extern void deps_write (const struct mkdeps *, FILE *, unsigned int);
+extern void deps_write (const struct mkdeps *, FILE *, bool, unsigned int);
 
 /* Write out a deps buffer to a file, in a form that can be read back
with deps_restore.  Returns nonzero on error, in which case the
@@ -70,10 +70,4 @@ extern int deps_save (struct mkdeps *, F
in which case that filename is skipped.  */
 extern int deps_restore (struct mkdeps *, FILE *, const char *);
 
-/* For each dependency *except the first*, emit a dummy rule for that
-   file, causing it to depend on nothing.  This is used to work around
-   the intermediate-file deletion misfeature in Make, in some
-   automatic dependency schemes.  */
-extern void deps_phony_targets (const struct mkdeps *, FILE *);
-
 #endif /* ! LIBCPP_MKDEPS_H */
Index: libcpp/init.c
===
--- libcpp/init.c	(revision 270940)
+++ libcpp/init.c	(working copy)
@@ -764,14 +764,9 @@ cpp_finish (cpp_reader *pfile, FILE *dep
   while (pfile->buffer)
 _cpp_pop_buffer (pfile);
 
-  if (CPP_OPTION (pfile, deps.style) != DEPS_NONE
-  && deps_stream)
-{
-  deps_write (pfile->deps, deps_stream, 72);
-
-  if (CPP_OPTION (pfile, deps.phony_targets))
-	deps_phony_targets (pfile->deps, deps_stream);
-}
+  if (CPP_OPTION (pfile, deps.style) != DEPS_NONE && deps_stream)
+deps_write (pfile->deps, deps_stream,
+		CPP_OPTION (pfile, deps.phony_targets), 72);
 
   /* Report on headers that could use multiple include guards.  */
   if (CPP_OPTION (pfile, print_include_names))
Index: libcpp/mkdeps.c
===
--- libcpp/mkdeps.c	(revision 270940)
+++ libcpp/mkdeps.c	(working copy)
@@ -24,99 +24,157 @@ along with this program; see the file CO
 #include "system.h"
 #include "mkdeps.h"
 
+/* Not set up to just include std::vector et al, here's a simple
+   implementation.  */
+
 /* Keep this structure local to this file, so clients don't find it
easy to start making assumptions.  */
 struct mkdeps
 {
-  const char **targetv;
-  unsigned int ntargets;	/* number of slots actually occupied */
-  unsigned int targets_size;	/* amt of allocated space - in words */
-
-  const char **depv;
-  unsigned int ndeps;
-  unsigned int deps_size;
-
-  const char **vpathv;
-  size_t *vpathlv;
-  unsigned int nvpaths;
-  unsigned int vpaths_size;
-};
+public:
+  /* T has trivial cctor & dtor.  */
+  template 
+  class vec
+  {
+  private:
+T *ary;
+unsigned num;
+unsigned alloc;
+
+  public:
+vec ()
+  : ary (NULL), num (0), alloc (0)
+  {}
+~vec ()
+  {
+	XDELETEVEC (ary);
+  }
+
+  public:
+unsigned size () const
+{
+  return num;
+}
+const T [] (unsigned ix) const
+{
+  return ary[ix];
+}
+void push (const T )
+{
+  if (num == alloc)
+	{
+	  alloc = alloc ? alloc * 2 : 16;
+	  ary = XRESIZEVEC (T, ary, alloc);
+	}
+  ary[num++] = elt;
+}
+  };
+  struct velt
+  {
+const char *str;
+size_t len;
+  };
+
+  mkdeps ()
+: quote_lwm (0)
+  {
+  }
+  ~mkdeps ()
+  {
+unsigned int i;
+
+for (i = targets.size 

[ada, build] Fix make install-gcc-specs with empty GCC_SPEC_FILES

2019-05-07 Thread Rainer Orth
When installing gcc 9.1.0 on Solaris 10 with CONFIG_SHELL=/bin/ksh, it
failed in the same way as originally fixed by

https://gcc.gnu.org/ml/gcc-patches/2016-05/msg00087.html

While the patch still is on the gcc-5 and gcc-6 branches, it has been
lost (inadvertently, I assume) on trunk before gcc-7 branched by r244367.

I'd like to restore the fix, preferably on all of mainline and the
gcc-9, gcc-8, and gcc-7 branches.

Tested with the gcc 9.1.0 release on i386-pc-solaris2.10 and
sparc-sun-solaris2.10.  Ok?

Thanks.
Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


2019-05-04  Rainer Orth  

* gcc-interface/Makefile.in (install-gcc-specs): Use foreach.
Honor DESTDIR.

# HG changeset patch
# Parent  cbbedd772b72266ea85b3f1548ddefe1f6248836
Fix make install-gcc-specs with empty GCC_SPECS_FILES

diff --git a/gcc/ada/gcc-interface/Makefile.in b/gcc/ada/gcc-interface/Makefile.in
--- a/gcc/ada/gcc-interface/Makefile.in
+++ b/gcc/ada/gcc-interface/Makefile.in
@@ -505,9 +505,8 @@ gnatlink-re: ../stamp-tools gnatmake-re
 install-gcc-specs:
 #	Install all the requested GCC spec files.
 
-	for f in $(GCC_SPEC_FILES); do \
-	$(INSTALL_DATA_DATE) $(srcdir)/ada/$$f $(libsubdir)/; \
-	done
+	$(foreach f,$(GCC_SPEC_FILES), \
+	$(INSTALL_DATA_DATE) $(srcdir)/ada/$(f) $(DESTDIR)$(libsubdir)/;)
 
 install-gnatlib: ../stamp-gnatlib-$(RTSDIR) install-gcc-specs
 	$(RMDIR) $(DESTDIR)$(ADA_RTL_OBJ_DIR)


Re: [PATCH] Implement LWG 2686, hash

2019-05-07 Thread Christophe Lyon
On Tue, 7 May 2019 at 12:07, Jonathan Wakely  wrote:
>
> On 07/05/19 10:37 +0100, Jonathan Wakely wrote:
> >On 07/05/19 11:05 +0200, Christophe Lyon wrote:
> >>On Sat, 4 May 2019 at 16:36, Jonathan Wakely  wrote:
> >>>
> >>>On 03/05/19 23:42 +0100, Jonathan Wakely wrote:
> On 23/03/17 17:49 +, Jonathan Wakely wrote:
> >On 12/03/17 13:16 +0100, Daniel Krügler wrote:
> >>The following is an *untested* patch suggestion, please verify.
> >>
> >>Notes: My interpretation is that hash should be
> >>defined outside of the _GLIBCXX_COMPATIBILITY_CXX0X block, please
> >>double-check that course of action.
> >
> >That's right.
> >
> >>I noticed that the preexisting hash did directly refer to
> >>the private members of error_code albeit those have public access
> >>functions. For consistency I mimicked that existing style when
> >>implementing hash.
> >
> >I see no reason for that, so I've removed the friend declaration and
> >used the public member functions.
> 
> I'm going to do the same for hash too. It can also use the
> public members instead of being a friend.
> 
> 
> >Although this is a DR, I'm treating it as a new C++17 feature, so I've
> >adjusted the patch to only add the new specialization for C++17 mode.
> >We're too close to the GCC 7 release to be adding new things to the
> >default mode, even minor things like this. After GCC 7 is released we
> >can revisit it and decide if we want to enable it for all modes.
> 
> We never revisited that, and it's still only enabled for C++17 and up.
> I guess that's OK, but we could enabled it for C++11 and 14 on trunk
> if we want. Anybody care enough to argue for that?
> 
> >Here's what I've tested and will be committing.
> >
> >
> 
> >commit 90ca0fd91f5c65af370beb20af06bdca257aaf63
> >Author: Jonathan Wakely 
> >Date:   Thu Mar 23 11:47:39 2017 +
> >
> >   Implement LWG 2686, std::hash, for C++17
> >   2017-03-23  Daniel Kruegler  
> >  Implement LWG 2686, Why is std::hash specialized for error_code,
> >  but not error_condition?
> >  * include/std/system_error (hash): Define for 
> > C++17.
> >  * testsuite/20_util/hash/operators/size_t.cc 
> > (hash):
> >  Instantiate test for error_condition.
> >  * testsuite/20_util/hash/requirements/explicit_instantiation.cc
> >  (hash): Instantiate hash.
> >
> >diff --git a/libstdc++-v3/include/std/system_error 
> >b/libstdc++-v3/include/std/system_error
> >index 6775a6e..ec7d25f 100644
> >--- a/libstdc++-v3/include/std/system_error
> >+++ b/libstdc++-v3/include/std/system_error
> >@@ -373,14 +373,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >_GLIBCXX_END_NAMESPACE_VERSION
> >} // namespace
> >
> >-#ifndef _GLIBCXX_COMPATIBILITY_CXX0X
> >-
> >#include 
> >
> >namespace std _GLIBCXX_VISIBILITY(default)
> >{
> >_GLIBCXX_BEGIN_NAMESPACE_VERSION
> >
> >+#ifndef _GLIBCXX_COMPATIBILITY_CXX0X
> >  // DR 1182.
> >  /// std::hash specialization for error_code.
> >  template<>
> >@@ -394,12 +393,27 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >  return std::_Hash_impl::__hash_combine(__e._M_cat, __tmp);
> >  }
> >};
> >+#endif // _GLIBCXX_COMPATIBILITY_CXX0X
> >+
> >+#if __cplusplus > 201402L
> >+  // DR 2686.
> >+  /// std::hash specialization for error_condition.
> >+  template<>
> >+struct hash
> >+: public __hash_base
> >+{
> >+  size_t
> >+  operator()(const error_condition& __e) const noexcept
> >+  {
> >+ const size_t __tmp = std::_Hash_impl::hash(__e.value());
> >+ return std::_Hash_impl::__hash_combine(__e.category(), __tmp);
> 
> When I changed this from using __e._M_cat (as in Daniel's patch) to
> __e.category() I introduced a bug, because the former is a pointer to
> the error_category (and error_category objects are unique and so can
> be identified by their address) and the latter is the object itself,
> so we hash the bytes of an abstract base class instead of hashing the
> pointer to it. Oops.
> 
> Patch coming up to fix that.
> >>>
> >>>Here's the fix. Tested powerpc64le-linux, committed to trunk.
> >>>
> >>>I'll backport this to 7, 8 and 9 as well.
> >>>
> >>
> >>Hi Jonathan,
> >>
> >>Does the new test lack dg-require-filesystem-ts ?
> >
> >It lacks it, because it doesn't use the filesystem library at all.
> >
> >>I'm seeing link failures on arm-eabi (using newlib):
> >>Excess errors:
> >>/libstdc++-v3/src/c++17/fs_ops.cc:806: undefined reference to `chdir'
> >>/libstdc++-v3/src/c++17/fs_ops.cc:583: undefined reference to `mkdir'
> >>/libstdc++-v3/src/c++17/fs_ops.cc:1134: undefined reference to `chmod'
> 

Re: [PATCH][stage1] Support profile (BB counts and edge probabilities) in GIMPLE FE.

2019-05-07 Thread Martin Liška
On 5/6/19 4:02 PM, Richard Biener wrote:
> On Mon, May 6, 2019 at 9:59 AM Martin Liška  wrote:
>>
>> On 5/2/19 2:31 PM, Richard Biener wrote:
>>> On Mon, Apr 29, 2019 at 2:51 PM Martin Liška  wrote:

 On 4/26/19 3:18 PM, Richard Biener wrote:
> On Wed, Apr 10, 2019 at 10:12 AM Martin Liška  wrote:
>>
>> On 4/9/19 3:19 PM, Jan Hubicka wrote:
 Hi.

 There's updated version that supports profile quality for both counts
 and probabilities. I'm wondering whether ENTRY and EXIT BBs needs to
 have set probability. Apparently, I haven't seen any verifier that
 would complain.
>>>
>>> Well, you do not need to define it but then several cases will
>>> degenerate. In particular BB frequencies (for callgraph profile or
>>> hot/cold decisions) are calculated as ratios of entry BB and given BB
>>> count. If entry BB is undefined you will get those undefined and
>>> heuristics will resort to conservative answers.
>>>
>>> I do not think we use exit block count.
>>>
>>> Honza
>>>
>>
>> Thank you Honza for explanation. I'm sending version of the patch
>> that supports entry BB count.
>>
>> I've been testing the patch right now.
>
> Can you move the GIMPLE/RTL FE specific data in c_declspecs to
> a substructure accessed via indirection?  I guess enlarging that
> isn't really what we should do.  You'd move gimple_or_rtl_pass
> there and make that pointer one to a struct aux_fe_data
> (lifetime managed by the respective RTL/GIMPLE FE, thus
> to be freed there)?  Joseph, do you agree or is adding more
> stuff to c_declspecs OK (I would guess it could be a few more
> elements in the future).

 Let's wait here for Joseph.
>>>
>>> So looks like it won't matter so let's go with the current approach
>>> for the moment.
>>>
>
> -c_parser_gimple_parse_bb_spec (tree val, int *index)
> +c_parser_gimple_parse_bb_spec (tree val, gimple_parser ,
> +  int *index, profile_probability 
> *probablity)
>  {
>
> so this will allow specifying probability in PHI node arguments.
> I think we want to split this into the already existing part
> and a c_parser_gimple_parse_bb_spec_with_edge_probability
> to be used at edge sources.

 Yes, that's a good idea!

>
> +  if (cfun->curr_properties & PROP_cfg)
> +{
> +  update_max_bb_count ();
> +  set_hot_bb_threshold (hot_bb_threshold);
> +  ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = entry_bb_count;
>
> I guess the last one should be before update_max_bb_count ()?

 Same here.

>
> +}
>
> + /* Parse profile: quality(value) */
>   else
> {
> - c_parser_error (parser, "unknown block specifier");
> - return return_p;
> + tree q;
> + profile_quality quality;
> + tree v = c_parser_peek_token (parser)->value;
>
> peek next token before the if/else and do
>
>else if (!strcmp (...))
>
> as in the loop_header case.  I expected we can somehow share
> parsing of profile quality and BB/edge count/frequency?  How's
> the expected syntax btw, comments in the code should tell us.
> I'm guessing it's quality-id '(' number ')' and thus it should be
> really shareable between edge and BB count and also __GIMPLE
> header parsing?  So parse_profile_quality should be
> parse_profile () instead, resulting in a combined value
> (we do use the same for edge/bb?).

 It's problematic, there are different error messages for count/frequency.
 Moreover call to parse_profile_quality in c_parser_gimple_or_rtl_pass_list
 is a way how to test that next 'token' is a profile count.
>>>
>>> Who cares about error messages...  But sure, I'm just proposing to
>>> merge testing for next token and actual parsing.
>>
>> After I've done removal of hot_bb_threshold parsing, there are just 2 usages
>> of parse_profile_quality. I would like to leave it as it, not introducing a 
>> wrappers.
>>
>>>
>
> +  else if (!strcmp (op, "hot_bb_threshold"))
> +   {
>
> I'm not sure about this - it doesn't make sense to specify this
> on a per-function base since it seems to control a global
> variable (booo!)?

 Yep, shame on me!

> Isn't this instead computed on-demand
> based on profile_info->sum_max?

 No it's a global value shared among functions.

> If not then I think
> we need an alternate way of funneling in global state into
> the GIMPLE FE.

 What about --param gimple-fe-hot-bb-threshold ?
>>>
>>> I thought about that, yes ...  in absence can it actually be
>>> "computed"?
>>
>> Renamed 

Re: [PATCH 2/2] Support {MIN,MAX}_EXPR in GIMPLE FE.

2019-05-07 Thread Martin Liška
On 5/6/19 1:35 PM, Richard Biener wrote:
> On Mon, May 6, 2019 at 10:00 AM Martin Liška  wrote:
>>
>> Hi.
>>
>> The patch is about support of a new GIMPLE expr.
>>
>> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
>>
>> Ready to be installed?
> 
> Can you please avoid using/changing parser_build_binary_op?  The other
> binary expression handling just does
> 
>   if (lhs.value != error_mark_node && rhs.value != error_mark_node)
> ret.value = build2_loc (ret_loc, code, ret_type, lhs.value, rhs.value);
> 
> which should work equally well here.  I think for future expansion
> splitting out the ( op, op ) parsing and expression building into
> a function might be nicer so that c_parser_gimple_unary_expression
> reads
> 
>   if (strcmp (INDENTIFIER_POINTER (id), "__MIN") == 0)
> return c_parser_gimple_parentized_binary_expression (op_loc, MIN_EXPR);
>   else if (...)
> 
> OK with such change/factoring.

I've done all what you pointed out.

Martin

> 
> Thanks,
> Richard.
> 
>> Thanks,
>> Martin

>From fe7fc3a153e404c485fa1d8dcd428c4a8ebc8f67 Mon Sep 17 00:00:00 2001
From: marxin 
Date: Fri, 3 May 2019 13:54:40 +0200
Subject: [PATCH 2/2] Support {MIN,MAX}_EXPR in GIMPLE FE.

gcc/ChangeLog:

2019-05-03  Martin Liska  

	* gimple-pretty-print.c (dump_binary_rhs): Dump MIN_EXPR
	and MAX_EXPR in GIMPLE FE format.

gcc/c/ChangeLog:

2019-05-03  Martin Liska  

	* gimple-parser.c (c_parser_gimple_statement): Support __MIN and
	__MAX.
	(c_parser_gimple_unary_expression): Parse also binary expression
	__MIN and __MAX.
	(c_parser_gimple_parentized_binary_expression): New function.

gcc/testsuite/ChangeLog:

2019-05-03  Martin Liska  

	* gcc.dg/gimplefe-39.c: New test.
---
 gcc/c/gimple-parser.c  | 38 +-
 gcc/gimple-pretty-print.c  | 15 +++-
 gcc/testsuite/gcc.dg/gimplefe-39.c | 21 +
 3 files changed, 72 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/gimplefe-39.c

diff --git a/gcc/c/gimple-parser.c b/gcc/c/gimple-parser.c
index ede5a927c3d..99f764710b2 100644
--- a/gcc/c/gimple-parser.c
+++ b/gcc/c/gimple-parser.c
@@ -750,7 +750,9 @@ c_parser_gimple_statement (gimple_parser , gimple_seq *seq)
   {
 	tree id = c_parser_peek_token (parser)->value;
 	if (strcmp (IDENTIFIER_POINTER (id), "__ABS") == 0
-	|| strcmp (IDENTIFIER_POINTER (id), "__ABSU") == 0)
+	|| strcmp (IDENTIFIER_POINTER (id), "__ABSU") == 0
+	|| strcmp (IDENTIFIER_POINTER (id), "__MIN") == 0
+	|| strcmp (IDENTIFIER_POINTER (id), "__MAX") == 0)
 	  goto build_unary_expr;
 	break;
   }
@@ -989,6 +991,32 @@ c_parser_gimple_binary_expression (gimple_parser )
   return ret;
 }
 
+/* Parse a gimple parentized binary expression.  */
+
+static c_expr
+c_parser_gimple_parentized_binary_expression (gimple_parser ,
+	  location_t op_loc,
+	  tree_code code)
+{
+  struct c_expr ret;
+  ret.set_error ();
+
+  c_parser_consume_token (parser);
+  if (!c_parser_require (parser, CPP_OPEN_PAREN, "expected %<(%>"))
+return ret;
+  c_expr op1 = c_parser_gimple_postfix_expression (parser);
+  if (!c_parser_require (parser, CPP_COMMA, "expected %<,%>"))
+return ret;
+  c_expr op2 = c_parser_gimple_postfix_expression (parser);
+  if (!c_parser_require (parser, CPP_CLOSE_PAREN, "expected %<)%>"))
+return ret;
+
+  if (op1.value != error_mark_node && op2.value != error_mark_node)
+ret.value = build2_loc (op_loc,
+			code, TREE_TYPE (op1.value), op1.value, op2.value);
+  return ret;
+}
+
 /* Parse gimple unary expression.
 
gimple-unary-expression:
@@ -1078,6 +1106,14 @@ c_parser_gimple_unary_expression (gimple_parser )
 	  op = c_parser_gimple_postfix_expression (parser);
 	  return parser_build_unary_op (op_loc, ABSU_EXPR, op);
 	}
+	  else if (strcmp (IDENTIFIER_POINTER (id), "__MIN") == 0)
+	return c_parser_gimple_parentized_binary_expression (parser,
+ op_loc,
+ MIN_EXPR);
+	  else if (strcmp (IDENTIFIER_POINTER (id), "__MAX") == 0)
+	return c_parser_gimple_parentized_binary_expression (parser,
+ op_loc,
+ MAX_EXPR);
 	  else
 	return c_parser_gimple_postfix_expression (parser);
 	}
diff --git a/gcc/gimple-pretty-print.c b/gcc/gimple-pretty-print.c
index 7e3916bff86..58212c4dcc1 100644
--- a/gcc/gimple-pretty-print.c
+++ b/gcc/gimple-pretty-print.c
@@ -423,9 +423,22 @@ dump_binary_rhs (pretty_printer *buffer, gassign *gs, int spc,
   enum tree_code code = gimple_assign_rhs_code (gs);
   switch (code)
 {
-case COMPLEX_EXPR:
 case MIN_EXPR:
 case MAX_EXPR:
+  if (flags & TDF_GIMPLE)
+	{
+	  pp_string (buffer, code == MIN_EXPR ? "__MIN (" : "__MAX (");
+	  dump_generic_node (buffer, gimple_assign_rhs1 (gs), spc, flags,
+			 false);
+	  pp_string (buffer, ", ");
+	  dump_generic_node (buffer, gimple_assign_rhs2 (gs), spc, flags,
+			 false);
+	  pp_string (buffer, ")");
+	  break;
+	}
+  else
+	

Re: [PATCH] Fold (x + 0.0) + 0.0 to x + 0.0 (PR tree-optimization/90356)

2019-05-07 Thread Marc Glisse

On Tue, 7 May 2019, Jakub Jelinek wrote:


On Tue, May 07, 2019 at 09:55:21AM +0200, Jakub Jelinek wrote:

On Tue, May 07, 2019 at 09:48:13AM +0200, Richard Biener wrote:

Will leave the "correctness check" for other folks
but the above is


BTW, as I wanted to be sure about the correctness, I wrote a simple program
(below).


Good idea :-)


And actually it seems that we could optimize the plus1 == plus2 cases
even if HONOR_SIGN_DEPENDENT_ROUNDING (type), because even in fesetenv
(FE_DOWNWARD) mode the testcase prints the first two (in all other modes all
4).


It is very hard to judge what is ok with -frounding-math, because that 
mode is already unusably broken (I use a pass-through asm volatile to 
protect the arguments and result of every operation instead). One 
important aspect of the optimization is whether both operations use the 
same rounding mode, or if there may be a call to fesetround in between. 
Probably we shouldn't care about -frounding-mode, since anyway it is 
likely that it will use some IFN_FANCY_PLUS instead of PLUS_EXPR if it is 
ever implemented.



+   (inner_op @0 @1


Shouldn't you give it a name in the source pattern and return that, 
instead of creating a new statement? Or are you doing the operation a 
second time on purpose in case the rounding mode changed or to force an 
exception?



+   (outer_op @0 @2)


With sNaN, this may raise a second exception where we used to have only 
qNaN+0, no? And the handling of exceptions may have changed in between, 
etc. Yes, -ftrapping-math is just as broken as -frounding-math.


--
Marc Glisse


[PATCH] Remove SLP tree build size limit

2019-05-07 Thread Richard Biener


First merge from the vectorize-with-SLP branch.

The limiting is ineffective for catching permute-caused growth because
this issue exists across instances as well.  The original exponential
growth has been fixed by making the SLP tree a graph.  The permute-caused
growth is limited to O(n^2).

This keeps the actual accounting and dumps the result.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.

Richard.

* tree-vect-slp.c (vect_build_slp_tree_2): Bump size whenever
we build a SLP node.  Remove max_size and limiting.
(vect_analyze_slp_instance): Record and dump size of the SLP graph.

diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index 40db1a208f1..2a1e5b83e53 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -1034,7 +1034,6 @@ vect_build_slp_tree_2 (vec_info *vinfo,
   vec stmts, unsigned int group_size,
   poly_uint64 *max_nunits,
   bool *matches, unsigned *npermutes, unsigned *tree_size,
-  unsigned max_tree_size,
   scalar_stmts_to_slp_tree_map_t *bst_map);
 
 static slp_tree
@@ -1042,7 +1041,6 @@ vect_build_slp_tree (vec_info *vinfo,
 vec stmts, unsigned int group_size,
 poly_uint64 *max_nunits,
 bool *matches, unsigned *npermutes, unsigned *tree_size,
-unsigned max_tree_size,
 scalar_stmts_to_slp_tree_map_t *bst_map)
 {
   if (slp_tree *leader = bst_map->get (stmts))
@@ -1055,8 +1053,7 @@ vect_build_slp_tree (vec_info *vinfo,
   return *leader;
 }
   slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size, max_nunits,
-   matches, npermutes, tree_size,
-   max_tree_size, bst_map);
+   matches, npermutes, tree_size, bst_map);
   /* Keep a reference for the bst_map use.  */
   if (res)
 res->refcnt++;
@@ -1076,7 +1073,6 @@ vect_build_slp_tree_2 (vec_info *vinfo,
   vec stmts, unsigned int group_size,
   poly_uint64 *max_nunits,
   bool *matches, unsigned *npermutes, unsigned *tree_size,
-  unsigned max_tree_size,
   scalar_stmts_to_slp_tree_map_t *bst_map)
 {
   unsigned nops, i, this_tree_size = 0;
@@ -1135,6 +1131,7 @@ vect_build_slp_tree_2 (vec_info *vinfo,
}
   else
return NULL;
+  (*tree_size)++;
   node = vect_create_new_slp_node (stmts);
   return node;
 }
@@ -1151,6 +1148,7 @@ vect_build_slp_tree_2 (vec_info *vinfo,
   && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
 {
   *max_nunits = this_max_nunits;
+  (*tree_size)++;
   node = vect_create_new_slp_node (stmts);
   return node;
 }
@@ -1178,9 +1176,6 @@ vect_build_slp_tree_2 (vec_info *vinfo,
 
   stmt_info = stmts[0];
 
-  if (tree_size)
-max_tree_size -= *tree_size;
-
   /* Create SLP_TREE nodes for the definition node/s.  */
   FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
 {
@@ -1193,23 +1188,10 @@ vect_build_slp_tree_2 (vec_info *vinfo,
  && oprnd_info->first_dt != vect_induction_def)
 continue;
 
-  if (++this_tree_size > max_tree_size)
-   {
- if (dump_enabled_p ())
-   dump_printf_loc (MSG_MISSED_OPTIMIZATION,
-vect_location,
-"Build SLP failed: SLP tree too large\n");
- FOR_EACH_VEC_ELT (children, j, child)
-   vect_free_slp_tree (child, false);
- vect_free_oprnd_info (oprnds_info);
- return NULL;
-   }
-
   if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
group_size, _max_nunits,
matches, npermutes,
-   _tree_size,
-   max_tree_size, bst_map)) != NULL)
+   _tree_size, bst_map)) != NULL)
{
  /* If we have all children of child built up from scalars then just
 throw that away and build it up this node from scalars.  */
@@ -1238,6 +1220,7 @@ vect_build_slp_tree_2 (vec_info *vinfo,
 "scalars instead\n");
  oprnd_info->def_stmts = vNULL;
  SLP_TREE_DEF_TYPE (child) = vect_external_def;
+ ++this_tree_size;
  children.safe_push (child);
  continue;
}
@@ -1266,6 +1249,7 @@ vect_build_slp_tree_2 (vec_info *vinfo,
  if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
 "Building vector operands from scalars\n");
+ this_tree_size++;
  child = vect_create_new_slp_node (oprnd_info->def_stmts);
  SLP_TREE_DEF_TYPE 

Re: [libphobos, build] Enable libphobos on Solaris 11/x86

2019-05-07 Thread Rainer Orth
Hi Iain,

>> > I've just given building gcc a try in an OpenIndiana VM, and get the
>> > following:
>> >
>> > ld: fatal: option -z has illegal argument 'relax=transtls'
>> > ld: fatal: flags processing errors
>> > collect2: error: ld returned 1 exit status
>> >
>> > $ ld --version
>> > ld: Software Generation Utilities - Solaris Link Editors: 5.11-1.1756
>> > (illumos)
>> >
>> > My fatal mistake of course was not configuring with
>> > --with-ld=/usr/gnu/bin/ld, however it is notable that not all Solaris
>> > linkers support this relax=transtls option.
>>
>> I finally got back to this and did some experiments of my own: even the
>> latest Illumos ld doesn't implement -z relax=transtls, neither has it
>> fixed the underlying bug, so it's useless for 64-bit Illumos/x86.
>>
>> The following patch checks for those conditions (ld support for -z
>> relax=transtls or gld in use) and only enables libphobos if they are
>> met.
>>
>> I had to move the whole enable_libphobos/LIBPHOBOS_SUPPORTED block down
>> in configure.ac so it's able to use the results of the augmented
>> DRUNTIME_OS_LINK_SPEC.
>>
>> While I didn't test the patch on Illumos (gcc builds inside a VM are
>> slow), I tried it on Solaris 10/x86 with gas/ld and gas/gld (and an
>> improved version of the patch for PR d/88238) where it behaved as
>> expected.  I've also included a previous version in a Solaris 11/x86
>> bootstrap.
>>
>
> OK, thanks for having a look into it.

installed on mainline now.  I guess this should be backported to gcc-9
branch after some soak time?

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


Re: [v3 PATCH] Make stateful allocator propagation more consistent for operator+(basic_string) (P1165R1)

2019-05-07 Thread Jonathan Wakely

On 07/05/19 12:22 +0100, Jonathan Wakely wrote:

On 07/05/19 12:01 +0100, Nina Dinka Ranns wrote:

Ack. I've put the use of _Alloc_traits::is_always_equal within #if
__cplusplus >= 201703L block since it is officially a C++17 feature.
Let me know if you think that's an overkill.


Yes, that's overkill, we provide is_always_equal unconditionally from
C++11 onwards (to avoid ODR violations in code using different -std
options). Since it's defined fo C++11 we can use it for C++11.


For the record, we treat is_always_equal as a DR, resolving LWG 2108
(which I submitted, because the runtime conditionals caused by not
having it were making me cry).


I can remove that #if and test and commit the result for you though,
no need for another revision of the patch.


New changelog below. I didn't change the description of
operator+(basic_string&&,basic_string&&) as it's still technically
always resulting in an allocator from the first parameter.


Yes, that looks fine. Thanks!



Re: [v3 PATCH] Make stateful allocator propagation more consistent for operator+(basic_string) (P1165R1)

2019-05-07 Thread Nina Dinka Ranns
On Tue, 7 May 2019 at 12:22, Jonathan Wakely  wrote:
>
> I can remove that #if and test and commit the result for you though,
> no need for another revision of the patch.
Thanks ! :)
Best,
Nina


Re: [v3 PATCH] Make stateful allocator propagation more consistent for operator+(basic_string) (P1165R1)

2019-05-07 Thread Jonathan Wakely

On 07/05/19 12:01 +0100, Nina Dinka Ranns wrote:

Ack. I've put the use of _Alloc_traits::is_always_equal within #if
__cplusplus >= 201703L block since it is officially a C++17 feature.
Let me know if you think that's an overkill.


Yes, that's overkill, we provide is_always_equal unconditionally from
C++11 onwards (to avoid ODR violations in code using different -std
options). Since it's defined fo C++11 we can use it for C++11.

I can remove that #if and test and commit the result for you though,
no need for another revision of the patch.


New changelog below. I didn't change the description of
operator+(basic_string&&,basic_string&&) as it's still technically
always resulting in an allocator from the first parameter.


Yes, that looks fine. Thanks!



[PATCH] Another PR90316 improvement

2019-05-07 Thread Richard Biener


Well, not actually measurable but we can save get_continuation_for_phi
calls from translate_vuse_through_block if we do not end up using
the result.

Bootstrap / regtest running on x86_64-unknown-linux-gnu.

Richard.

2019-05-07  Richard Biener  

PR tree-optimization/90316
* tree-ssa-pre.c (translate_vuse_through_block): When
same_valid is NULL do not bother to search for a virtual
PHI continuation.
(phi_translate_1): When operands changed we cannot keep
the same value-number so do not bother to ask whether
that's possible from translate_vuse_through_block.

Index: gcc/tree-ssa-pre.c
===
--- gcc/tree-ssa-pre.c  (revision 270940)
+++ gcc/tree-ssa-pre.c  (working copy)
@@ -1146,7 +1146,8 @@ translate_vuse_through_block (vecdest_idx);
-   }
+  /* If we didn't find any, the value ID can't stay the same.  */
+  if (!vuse && same_valid)
+   *same_valid = false;
   /* ??? We would like to return vuse here as this is the canonical
  upmost vdef that this reference is associated with.  But during
 insertion of the references into the hash tables we only ever
@@ -1535,7 +1532,8 @@ phi_translate_1 (bitmap_set_t dest,
? newoperands : operands,
ref->set, ref->type,
vuse, phiblock, pred,
-   _valid);
+   changed
+   ? NULL : _valid);
if (newvuse == NULL_TREE)
  {
newoperands.release ();



[PATCH] Properly limit PRE alias walking (PR90316)

2019-05-07 Thread Richard Biener


This is an attempt to improve compile-time for PR90316 further
where PRE (in particular PHI translation) takes too much time
by walking aliases.  Currently translate_vuses_through_block
walking isn't in any way limited, the following patch makes
it so and also limits get_continuation_for_phi walking in
general by refactoring how the limiting works.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.

This makes lowering --param sccvn-maxa-alias-queries-per-access
effective for the testcase which it wasn't before.

Richard.

2019-05-07  Richard Biener  

PR tree-optimization/90316
* tree-ssa-alias.h (get_continuation_for_phi): Take walking
limit by reference.
(walk_non_aliased_vuses): Take walking limit argument.
* tree-ssa-alias.c (maybe_skip_until): Take limit and abort
walking if it is reached instead of just counting.
(get_continuation_for_phi): Likewise.
(walk_non_aliased_vuses): Likewise, instead of leaving counter
limiting to the callback.
* tree-ssa-sccvn.c (vn_reference_lookup_2): Adjust.
(vn_reference_lookup_3): Likewise.
(vn_reference_lookup_pieces): Likewise.
(vn_reference_lookup): Likewise.
* tree-ssa-pre.c (translate_vuse_through_block): Limit walking.
* tree-ssa-scopedtables.c (vuse_eq): Adjust.
(avail_exprs_stack::lookup_avail_expr): Likewise.

Index: gcc/tree-ssa-alias.h
===
--- gcc/tree-ssa-alias.h(revision 270934)
+++ gcc/tree-ssa-alias.h(working copy)
@@ -132,15 +132,13 @@ extern bool call_may_clobber_ref_p_1 (gc
 extern bool stmt_kills_ref_p (gimple *, tree);
 extern bool stmt_kills_ref_p (gimple *, ao_ref *);
 extern tree get_continuation_for_phi (gimple *, ao_ref *,
- unsigned int *, bitmap *, bool,
+ unsigned int &, bitmap *, bool,
  void *(*)(ao_ref *, tree, void *, bool *),
  void *);
 extern void *walk_non_aliased_vuses (ao_ref *, tree,
-void *(*)(ao_ref *, tree,
-  unsigned int, void *),
+void *(*)(ao_ref *, tree, void *),
 void *(*)(ao_ref *, tree, void *, bool *),
-tree (*)(tree),
-void *);
+tree (*)(tree), unsigned &, void *);
 extern int walk_aliased_vdefs (ao_ref *, tree,
   bool (*)(ao_ref *, tree, void *),
   void *, bitmap *,
Index: gcc/tree-ssa-alias.c
===
--- gcc/tree-ssa-alias.c(revision 270934)
+++ gcc/tree-ssa-alias.c(working copy)
@@ -2599,7 +2599,7 @@ stmt_kills_ref_p (gimple *stmt, tree ref
 
 static bool
 maybe_skip_until (gimple *phi, tree , basic_block target_bb,
- ao_ref *ref, tree vuse, unsigned int *cnt, bitmap *visited,
+ ao_ref *ref, tree vuse, unsigned int , bitmap *visited,
  bool abort_on_visited,
  void *(*translate)(ao_ref *, tree, void *, bool *),
  void *data)
@@ -2634,7 +2634,7 @@ maybe_skip_until (gimple *phi, tree 
  /* An already visited PHI node ends the walk successfully.  */
  if (bitmap_bit_p (*visited, SSA_NAME_VERSION (PHI_RESULT (def_stmt
return !abort_on_visited;
- vuse = get_continuation_for_phi (def_stmt, ref, cnt,
+ vuse = get_continuation_for_phi (def_stmt, ref, limit,
   visited, abort_on_visited,
   translate, data);
  if (!vuse)
@@ -2646,7 +2646,9 @@ maybe_skip_until (gimple *phi, tree 
   else
{
  /* A clobbering statement or the end of the IL ends it failing.  */
- ++*cnt;
+ if ((int)limit <= 0)
+   return false;
+ --limit;
  if (stmt_may_clobber_ref_p_1 (def_stmt, ref))
{
  bool disambiguate_only = true;
@@ -2674,12 +2676,13 @@ maybe_skip_until (gimple *phi, tree 
 /* Starting from a PHI node for the virtual operand of the memory reference
REF find a continuation virtual operand that allows to continue walking
statements dominating PHI skipping only statements that cannot possibly
-   clobber REF.  Increments *CNT for each alias disambiguation done.
+   clobber REF.  Decrements LIMIT for each alias disambiguation done
+   and aborts the walk, returning NULL_TREE if it reaches zero.
Returns NULL_TREE if no suitable virtual operand can be found.  */
 
 tree
 get_continuation_for_phi (gimple *phi, ao_ref *ref,
- unsigned int *cnt, bitmap 

Re: [v3 PATCH] Make stateful allocator propagation more consistent for operator+(basic_string) (P1165R1)

2019-05-07 Thread Nina Dinka Ranns
Ack. I've put the use of _Alloc_traits::is_always_equal within #if
__cplusplus >= 201703L block since it is officially a C++17 feature.
Let me know if you think that's an overkill.
New changelog below. I didn't change the description of
operator+(basic_string&&,basic_string&&) as it's still technically
always resulting in an allocator from the first parameter.


2019-05-01  Nina Dinka Ranns  
Make stateful allocator propagation more consistent
foroperator+(basic_string) (P1165R1)
* include/bits/basic_string.h:
(operator+(basic_string&&,basic_string&&) : Changed resulting
allocator to always be the one from the first parameter
* include/bits/basic_string.tcc:
(operator+(const _CharT*, const basic_string&)) : Changed
resulting allocator to be SOCCC on the second parameter's allocator
(operator+(_CharT, const basic_string&)) : Likewise
* testsuite/21_strings/basic_string/allocator/char/operator_plus.cc: New
* testsuite/21_strings/basic_string/allocator/wchar_t/operator_plus.cc:
New
Index: libstdc++-v3/include/bits/basic_string.h
===
--- libstdc++-v3/include/bits/basic_string.h	(revision 270655)
+++ libstdc++-v3/include/bits/basic_string.h	(working copy)
@@ -6097,11 +6097,22 @@
 operator+(basic_string<_CharT, _Traits, _Alloc>&& __lhs,
 	  basic_string<_CharT, _Traits, _Alloc>&& __rhs)
 {
-  const auto __size = __lhs.size() + __rhs.size();
-  const bool __cond = (__size > __lhs.capacity()
-			   && __size <= __rhs.capacity());
-  return __cond ? std::move(__rhs.insert(0, __lhs))
-	: std::move(__lhs.append(__rhs));
+	  using _Alloc_traits = allocator_traits<_Alloc>;
+	  bool __use_rhs = false;
+#if __cplusplus >= 201703L
+  if _GLIBCXX17_CONSTEXPR (typename _Alloc_traits::is_always_equal{})
+	__use_rhs = true;
+	  else
+#endif
+  if (__lhs.get_allocator() == __rhs.get_allocator())
+	__use_rhs = true;
+	  if (__use_rhs)
+	  {
+		const auto __size = __lhs.size() + __rhs.size();
+		if (__size > __lhs.capacity() && __size <= __rhs.capacity())
+		  return std::move(__rhs.insert(0, __lhs));
+	  }
+	  return std::move(__lhs.append(__rhs));
 }
 
   template
Index: libstdc++-v3/include/bits/basic_string.tcc
===
--- libstdc++-v3/include/bits/basic_string.tcc	(revision 270655)
+++ libstdc++-v3/include/bits/basic_string.tcc	(working copy)
@@ -1161,8 +1161,12 @@
   __glibcxx_requires_string(__lhs);
   typedef basic_string<_CharT, _Traits, _Alloc> __string_type;
   typedef typename __string_type::size_type	  __size_type;
+  typedef typename __gnu_cxx::__alloc_traits<_Alloc>::template
+	rebind<_CharT>::other _Char_alloc_type;
+  typedef __gnu_cxx::__alloc_traits<_Char_alloc_type> _Alloc_traits;
   const __size_type __len = _Traits::length(__lhs);
-  __string_type __str;
+  __string_type __str(_Alloc_traits::_S_select_on_copy(
+  __rhs.get_allocator()));
   __str.reserve(__len + __rhs.size());
   __str.append(__lhs, __len);
   __str.append(__rhs);
@@ -1175,7 +1179,11 @@
 {
   typedef basic_string<_CharT, _Traits, _Alloc> __string_type;
   typedef typename __string_type::size_type	  __size_type;
-  __string_type __str;
+  typedef typename __gnu_cxx::__alloc_traits<_Alloc>::template
+	rebind<_CharT>::other _Char_alloc_type;
+  typedef __gnu_cxx::__alloc_traits<_Char_alloc_type> _Alloc_traits;
+  __string_type __str(_Alloc_traits::_S_select_on_copy(
+  __rhs.get_allocator()));
   const __size_type __len = __rhs.size();
   __str.reserve(__len + 1);
   __str.append(__size_type(1), __lhs);
Index: libstdc++-v3/testsuite/21_strings/basic_string/allocator/char/operator_plus.cc
===
--- libstdc++-v3/testsuite/21_strings/basic_string/allocator/char/operator_plus.cc	(nonexistent)
+++ libstdc++-v3/testsuite/21_strings/basic_string/allocator/char/operator_plus.cc	(working copy)
@@ -0,0 +1,151 @@
+// 2019-04-30  Nina Dinka Ranns  
+// Copyright (C) 2019 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+ 
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+ 
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// .
+
+// { dg-do run { target c++11 } }
+// COW 

Re: [PATCH] Implement LWG 2686, hash

2019-05-07 Thread Jonathan Wakely

On 07/05/19 10:37 +0100, Jonathan Wakely wrote:

On 07/05/19 11:05 +0200, Christophe Lyon wrote:

On Sat, 4 May 2019 at 16:36, Jonathan Wakely  wrote:


On 03/05/19 23:42 +0100, Jonathan Wakely wrote:

On 23/03/17 17:49 +, Jonathan Wakely wrote:

On 12/03/17 13:16 +0100, Daniel Krügler wrote:

The following is an *untested* patch suggestion, please verify.

Notes: My interpretation is that hash should be
defined outside of the _GLIBCXX_COMPATIBILITY_CXX0X block, please
double-check that course of action.


That's right.


I noticed that the preexisting hash did directly refer to
the private members of error_code albeit those have public access
functions. For consistency I mimicked that existing style when
implementing hash.


I see no reason for that, so I've removed the friend declaration and
used the public member functions.


I'm going to do the same for hash too. It can also use the
public members instead of being a friend.



Although this is a DR, I'm treating it as a new C++17 feature, so I've
adjusted the patch to only add the new specialization for C++17 mode.
We're too close to the GCC 7 release to be adding new things to the
default mode, even minor things like this. After GCC 7 is released we
can revisit it and decide if we want to enable it for all modes.


We never revisited that, and it's still only enabled for C++17 and up.
I guess that's OK, but we could enabled it for C++11 and 14 on trunk
if we want. Anybody care enough to argue for that?


Here's what I've tested and will be committing.





commit 90ca0fd91f5c65af370beb20af06bdca257aaf63
Author: Jonathan Wakely 
Date:   Thu Mar 23 11:47:39 2017 +

  Implement LWG 2686, std::hash, for C++17
  2017-03-23  Daniel Kruegler  
 Implement LWG 2686, Why is std::hash specialized for error_code,
 but not error_condition?
 * include/std/system_error (hash): Define for C++17.
 * testsuite/20_util/hash/operators/size_t.cc (hash):
 Instantiate test for error_condition.
 * testsuite/20_util/hash/requirements/explicit_instantiation.cc
 (hash): Instantiate hash.

diff --git a/libstdc++-v3/include/std/system_error 
b/libstdc++-v3/include/std/system_error
index 6775a6e..ec7d25f 100644
--- a/libstdc++-v3/include/std/system_error
+++ b/libstdc++-v3/include/std/system_error
@@ -373,14 +373,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_GLIBCXX_END_NAMESPACE_VERSION
} // namespace

-#ifndef _GLIBCXX_COMPATIBILITY_CXX0X
-
#include 

namespace std _GLIBCXX_VISIBILITY(default)
{
_GLIBCXX_BEGIN_NAMESPACE_VERSION

+#ifndef _GLIBCXX_COMPATIBILITY_CXX0X
 // DR 1182.
 /// std::hash specialization for error_code.
 template<>
@@ -394,12 +393,27 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 return std::_Hash_impl::__hash_combine(__e._M_cat, __tmp);
 }
   };
+#endif // _GLIBCXX_COMPATIBILITY_CXX0X
+
+#if __cplusplus > 201402L
+  // DR 2686.
+  /// std::hash specialization for error_condition.
+  template<>
+struct hash
+: public __hash_base
+{
+  size_t
+  operator()(const error_condition& __e) const noexcept
+  {
+ const size_t __tmp = std::_Hash_impl::hash(__e.value());
+ return std::_Hash_impl::__hash_combine(__e.category(), __tmp);


When I changed this from using __e._M_cat (as in Daniel's patch) to
__e.category() I introduced a bug, because the former is a pointer to
the error_category (and error_category objects are unique and so can
be identified by their address) and the latter is the object itself,
so we hash the bytes of an abstract base class instead of hashing the
pointer to it. Oops.

Patch coming up to fix that.


Here's the fix. Tested powerpc64le-linux, committed to trunk.

I'll backport this to 7, 8 and 9 as well.



Hi Jonathan,

Does the new test lack dg-require-filesystem-ts ?


It lacks it, because it doesn't use the filesystem library at all.


I'm seeing link failures on arm-eabi (using newlib):
Excess errors:
/libstdc++-v3/src/c++17/fs_ops.cc:806: undefined reference to `chdir'
/libstdc++-v3/src/c++17/fs_ops.cc:583: undefined reference to `mkdir'
/libstdc++-v3/src/c++17/fs_ops.cc:1134: undefined reference to `chmod'
/libstdc++-v3/src/c++17/../filesystem/ops-common.h:439: undefined
reference to `chmod'
/libstdc++-v3/src/c++17/fs_ops.cc:750: undefined reference to `pathconf'
/libstdc++-v3/src/c++17/fs_ops.cc:769: undefined reference to `getcwd'

Christophe


Is it definitely the new 19_diagnostics/error_condition/hash.cc test
that's giving this error?

I adjusted the pre-existing 27_io/filesystem/operations/absolute.cc
test in r270874, which seems a more likely culprit, but that already
has dg-require-filesystem-ts.




Re: aliasing_component_refs_p tweek

2019-05-07 Thread Jan Hubicka
> 
> No need for the else {} and thus indenting the rest since the if ()
> arm always returns from the function.
> 
> OK with eliding this else { } wrapping.

Ah, right, I changed the function bit too many times :)
Here is updated patch I re-tested and comitted.

* tree-ssa-alias.c (aliasing_component_refs_p): Continue looking
for comparaible types in the second direction even if first one
hits incomparable type.
Index: tree-ssa-alias.c
===
--- tree-ssa-alias.c(revision 270877)
+++ tree-ssa-alias.c(working copy)
@@ -795,7 +795,7 @@ aliasing_component_refs_p (tree ref1,
   tree base1, base2;
   tree type1, type2;
   tree *refp;
-  int same_p;
+  int same_p, same_p2;
 
   /* Choose bases and base types to search for.  */
   base1 = ref1;
@@ -814,10 +814,7 @@ aliasing_component_refs_p (tree ref1,
 && same_type_for_tbaa (TREE_TYPE (*refp), type1) == 0)
 refp = _OPERAND (*refp, 0);
   same_p = same_type_for_tbaa (TREE_TYPE (*refp), type1);
-  /* If we couldn't compare types we have to bail out.  */
-  if (same_p == -1)
-return true;
-  else if (same_p == 1)
+  if (same_p == 1)
 {
   poly_int64 offadj, sztmp, msztmp;
   bool reverse;
@@ -827,26 +824,31 @@ aliasing_component_refs_p (tree ref1,
   offset1 -= offadj;
   return ranges_maybe_overlap_p (offset1, max_size1, offset2, max_size2);
 }
+
   /* If we didn't find a common base, try the other way around.  */
   refp = 
   while (handled_component_p (*refp)
 && same_type_for_tbaa (TREE_TYPE (*refp), type2) == 0)
 refp = _OPERAND (*refp, 0);
-  same_p = same_type_for_tbaa (TREE_TYPE (*refp), type2);
-  /* If we couldn't compare types we have to bail out.  */
-  if (same_p == -1)
-return true;
-  else if (same_p == 1)
+  same_p2 = same_type_for_tbaa (TREE_TYPE (*refp), type2);
+  if (same_p2 == 1)
 {
   poly_int64 offadj, sztmp, msztmp;
   bool reverse;
   get_ref_base_and_extent (*refp, , , , );
   offset1 -= offadj;
   get_ref_base_and_extent (base2, , , , );
   offset2 -= offadj;
-  return ranges_maybe_overlap_p (offset1, max_size1, offset2, max_size2);
+  return ranges_maybe_overlap_p (offset1, max_size1,
+offset2, max_size2);
 }
 
+  /* In the remaining test we assume that there is no overlapping type
+ at all.  So if we are unsure, we need to give up.  */
+  if (same_p == -1 || same_p2 == -1)
+return true;
+
   /* If we have two type access paths B1.path1 and B2.path2 they may
  only alias if either B1 is in B2.path2 or B2 is in B1.path1.
  But we can still have a path that goes B1.path1...B2.path2 with


Re: abstract out EH propagation cleanups

2019-05-07 Thread Richard Biener
On Tue, May 7, 2019 at 11:13 AM Aldy Hernandez  wrote:
>
> Hi.
>
> We seem to have numerous copies of the same EH propagation cleanups
> scattered throughout the compiler.  The attached patch moves all the
> logic into one class that allows for easy marking of statements and
> automatic cleanup once it goes out of scope.
>
> Tested on x86-64 Linux.
>
> OK for trunk? (*)

Ugh :/

First of all I don't like the fact that the actual cleanup is done
upon constructor execution.  Please make it explicit
and in the constructor assert that nothing is to be done.

Then I'm not sure this is a 1:1 transform since for example

@@ -1061,8 +1173,6 @@
substitute_and_fold_dom_walker::before_dom_children (basic_block bb)
}

   gimple *old_stmt = stmt;
-  bool was_noreturn = (is_gimple_call (stmt)
-  && gimple_call_noreturn_p (stmt));

   /* Replace real uses in the statement.  */
   did_replace |= substitute_and_fold_engine->replace_uses_in (stmt);
@@ -1110,25 +1220,7 @@
substitute_and_fold_dom_walker::before_dom_children (basic_block bb)
   /* Now cleanup.  */
   if (did_replace)
{
...
+ fixups.record_change (old_stmt, stmt);

here we no longer can reliably determine whether old_stmt was noreturn since
we substitute into stmt itself.  It's no longer a correctness issue if
we do _not_
fixup_noreturn since we now have GF_CALL_CTRL_ALTERING, it's merely
an optimization issue.  So there may be no testcase for this (previously such
cases ICEd).

I'm also not sure I like to put all these (unrelated) things into a
single class,
it really also hides the details of what is performed immediately and what
delayed and what kind of changes - this makes understanding of pass
transforms hard.

Richard.

> Aldy
>
> (*) If this is too invasive for the period immediately following the
> re-opening of stage1, I can hold off the commit (if approved).


Re: [Bug libstdc++/90277] Debug Mode test failures

2019-05-07 Thread Jonathan Wakely

On 07/05/19 07:06 +0200, François Dumont wrote:

Hi

    I just prefer to make the tests implementation-agnostic using reserve.

    I check that without the patch to initiate the hashtable with 11 
buckets I reproduce the failures and that with this patch it is fine.


    PR libstdc++/90277
    * testsuite/23_containers/unordered_multiset/insert/24061-multiset.cc
    (test01): Reserve for number of insertions to avoid rehash during test.
    * testsuite/23_containers/unordered_multimap/insert/24061-multimap.cc
    (test01): Likewise.
    * testsuite/23_containers/unordered_multimap/insert/24061-multimap.cc
    (test01): Likewise.
    (test02): Likewise.
    (test03): Likewise.

    I plan to commit it this evening if not told otherwise.


Great, thanks.




Re: [PATCH] Implement LWG 2686, hash

2019-05-07 Thread Jonathan Wakely

On 07/05/19 11:05 +0200, Christophe Lyon wrote:

On Sat, 4 May 2019 at 16:36, Jonathan Wakely  wrote:


On 03/05/19 23:42 +0100, Jonathan Wakely wrote:
>On 23/03/17 17:49 +, Jonathan Wakely wrote:
>>On 12/03/17 13:16 +0100, Daniel Krügler wrote:
>>>The following is an *untested* patch suggestion, please verify.
>>>
>>>Notes: My interpretation is that hash should be
>>>defined outside of the _GLIBCXX_COMPATIBILITY_CXX0X block, please
>>>double-check that course of action.
>>
>>That's right.
>>
>>>I noticed that the preexisting hash did directly refer to
>>>the private members of error_code albeit those have public access
>>>functions. For consistency I mimicked that existing style when
>>>implementing hash.
>>
>>I see no reason for that, so I've removed the friend declaration and
>>used the public member functions.
>
>I'm going to do the same for hash too. It can also use the
>public members instead of being a friend.
>
>
>>Although this is a DR, I'm treating it as a new C++17 feature, so I've
>>adjusted the patch to only add the new specialization for C++17 mode.
>>We're too close to the GCC 7 release to be adding new things to the
>>default mode, even minor things like this. After GCC 7 is released we
>>can revisit it and decide if we want to enable it for all modes.
>
>We never revisited that, and it's still only enabled for C++17 and up.
>I guess that's OK, but we could enabled it for C++11 and 14 on trunk
>if we want. Anybody care enough to argue for that?
>
>>Here's what I've tested and will be committing.
>>
>>
>
>>commit 90ca0fd91f5c65af370beb20af06bdca257aaf63
>>Author: Jonathan Wakely 
>>Date:   Thu Mar 23 11:47:39 2017 +
>>
>>   Implement LWG 2686, std::hash, for C++17
>>   2017-03-23  Daniel Kruegler  
>>  Implement LWG 2686, Why is std::hash specialized for error_code,
>>  but not error_condition?
>>  * include/std/system_error (hash): Define for C++17.
>>  * testsuite/20_util/hash/operators/size_t.cc (hash):
>>  Instantiate test for error_condition.
>>  * testsuite/20_util/hash/requirements/explicit_instantiation.cc
>>  (hash): Instantiate hash.
>>
>>diff --git a/libstdc++-v3/include/std/system_error 
b/libstdc++-v3/include/std/system_error
>>index 6775a6e..ec7d25f 100644
>>--- a/libstdc++-v3/include/std/system_error
>>+++ b/libstdc++-v3/include/std/system_error
>>@@ -373,14 +373,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>>_GLIBCXX_END_NAMESPACE_VERSION
>>} // namespace
>>
>>-#ifndef _GLIBCXX_COMPATIBILITY_CXX0X
>>-
>>#include 
>>
>>namespace std _GLIBCXX_VISIBILITY(default)
>>{
>>_GLIBCXX_BEGIN_NAMESPACE_VERSION
>>
>>+#ifndef _GLIBCXX_COMPATIBILITY_CXX0X
>>  // DR 1182.
>>  /// std::hash specialization for error_code.
>>  template<>
>>@@ -394,12 +393,27 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>>  return std::_Hash_impl::__hash_combine(__e._M_cat, __tmp);
>>  }
>>};
>>+#endif // _GLIBCXX_COMPATIBILITY_CXX0X
>>+
>>+#if __cplusplus > 201402L
>>+  // DR 2686.
>>+  /// std::hash specialization for error_condition.
>>+  template<>
>>+struct hash
>>+: public __hash_base
>>+{
>>+  size_t
>>+  operator()(const error_condition& __e) const noexcept
>>+  {
>>+ const size_t __tmp = std::_Hash_impl::hash(__e.value());
>>+ return std::_Hash_impl::__hash_combine(__e.category(), __tmp);
>
>When I changed this from using __e._M_cat (as in Daniel's patch) to
>__e.category() I introduced a bug, because the former is a pointer to
>the error_category (and error_category objects are unique and so can
>be identified by their address) and the latter is the object itself,
>so we hash the bytes of an abstract base class instead of hashing the
>pointer to it. Oops.
>
>Patch coming up to fix that.

Here's the fix. Tested powerpc64le-linux, committed to trunk.

I'll backport this to 7, 8 and 9 as well.



Hi Jonathan,

Does the new test lack dg-require-filesystem-ts ?


It lacks it, because it doesn't use the filesystem library at all.


I'm seeing link failures on arm-eabi (using newlib):
Excess errors:
/libstdc++-v3/src/c++17/fs_ops.cc:806: undefined reference to `chdir'
/libstdc++-v3/src/c++17/fs_ops.cc:583: undefined reference to `mkdir'
/libstdc++-v3/src/c++17/fs_ops.cc:1134: undefined reference to `chmod'
/libstdc++-v3/src/c++17/../filesystem/ops-common.h:439: undefined
reference to `chmod'
/libstdc++-v3/src/c++17/fs_ops.cc:750: undefined reference to `pathconf'
/libstdc++-v3/src/c++17/fs_ops.cc:769: undefined reference to `getcwd'

Christophe


Re: [PATCH] Fold (x + 0.0) + 0.0 to x + 0.0 (PR tree-optimization/90356)

2019-05-07 Thread Jakub Jelinek
On Tue, May 07, 2019 at 09:55:21AM +0200, Jakub Jelinek wrote:
> On Tue, May 07, 2019 at 09:48:13AM +0200, Richard Biener wrote:
> > Will leave the "correctness check" for other folks
> > but the above is

BTW, as I wanted to be sure about the correctness, I wrote a simple program
(below).
And actually it seems that we could optimize the plus1 == plus2 cases
even if HONOR_SIGN_DEPENDENT_ROUNDING (type), because even in fesetenv
(FE_DOWNWARD) mode the testcase prints the first two (in all other modes all
4).

So here is also an updated version of the patch:

2019-05-07  Jakub Jelinek  

PR tree-optimization/90356
* match.pd ((X +/- 0.0) +/- 0.0): Optimize into X +/- 0.0 if possible.

* gcc.dg/tree-ssa/pr90356-1.c: New test.
* gcc.dg/tree-ssa/pr90356-2.c: New test.
* gcc.dg/tree-ssa/pr90356-3.c: New test.
* gcc.dg/tree-ssa/pr90356-4.c: New test.
* gcc.dg/tree-ssa/pr90356-5.c: New test.
* gcc.dg/tree-ssa/pr90356-6.c: New test.

--- gcc/match.pd.jj 2019-05-06 23:47:52.642628123 +0200
+++ gcc/match.pd2019-05-07 10:40:25.475136027 +0200
@@ -152,6 +152,28 @@ (define_operator_list COND_TERNARY
  (if (fold_real_zero_addition_p (type, @1, 1))
   (non_lvalue @0)))
 
+/* Even if the fold_real_zero_addition_p can't simplify X + 0.0
+   into X, we can optimize (X + 0.0) + 0.0 or (X + 0.0) - 0.0
+   or (X - 0.0) + 0.0 into X + 0.0 and (X - 0.0) - 0.0 into X - 0.0
+   if not -frounding-math (for (X + 0.0) + 0.0 and (X - 0.0) - 0.0
+   even if -frounding-math).  For sNaNs the first operation would raise
+   exceptions but turn the result into qNan, so the second operation
+   would not raise it.   */
+(for inner_op (plus minus)
+ (for outer_op (plus minus)
+  (simplify
+   (outer_op (inner_op @0 REAL_CST@1) REAL_CST@2)
+(if (real_zerop (@1) && real_zerop (@2))
+ (with { bool inner_plus = ((inner_op == PLUS_EXPR)
+   ^ REAL_VALUE_MINUS_ZERO (TREE_REAL_CST (@1)));
+bool outer_plus
+  = ((outer_op == PLUS_EXPR)
+ ^ REAL_VALUE_MINUS_ZERO (TREE_REAL_CST (@2))); }
+  (if (!HONOR_SIGN_DEPENDENT_ROUNDING (type) || outer_plus == inner_plus)
+   (if (outer_plus && !inner_plus)
+   (outer_op @0 @2)
+   (inner_op @0 @1
+
 /* Simplify x - x.
This is unsafe for certain floats even in non-IEEE formats.
In IEEE, it is unsafe because it does wrong for NaNs.
--- gcc/testsuite/gcc.dg/tree-ssa/pr90356-1.c.jj2019-05-07 
10:34:07.270208201 +0200
+++ gcc/testsuite/gcc.dg/tree-ssa/pr90356-1.c   2019-05-07 10:34:07.270208201 
+0200
@@ -0,0 +1,23 @@
+/* PR tree-optimization/90356 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-rounding-math -fsignaling-nans -fsigned-zeros 
-fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-times "x_\[0-9]*.D. \\+ 0.0;" 12 "optimized" } 
} */
+/* { dg-final { scan-tree-dump-times "y_\[0-9]*.D. - 0.0;" 4 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " \[+-] 0.0;" 16 "optimized" } } */
+
+double f1 (double x) { return (x + 0.0) + 0.0; }
+double f2 (double y) { return (y + (-0.0)) + (-0.0); }
+double f3 (double y) { return (y - 0.0) - 0.0; }
+double f4 (double x) { return (x - (-0.0)) - (-0.0); }
+double f5 (double x) { return (x + 0.0) - 0.0; }
+double f6 (double x) { return (x + (-0.0)) - (-0.0); }
+double f7 (double x) { return (x - 0.0) + 0.0; }
+double f8 (double x) { return (x - (-0.0)) + (-0.0); }
+double f9 (double x) { double t = x + 0.0; return t + 0.0; }
+double f10 (double y) { double t = y + (-0.0); return t + (-0.0); }
+double f11 (double y) { double t = y - 0.0; return t - 0.0; }
+double f12 (double x) { double t = x - (-0.0); return t - (-0.0); }
+double f13 (double x) { double t = x + 0.0; return t - 0.0; }
+double f14 (double x) { double t = x + (-0.0); return t - (-0.0); }
+double f15 (double x) { double t = x - 0.0; return t + 0.0; }
+double f16 (double x) { double t = x - (-0.0); return t + (-0.0); }
--- gcc/testsuite/gcc.dg/tree-ssa/pr90356-2.c.jj2019-05-07 
10:34:07.270208201 +0200
+++ gcc/testsuite/gcc.dg/tree-ssa/pr90356-2.c   2019-05-07 10:34:07.270208201 
+0200
@@ -0,0 +1,8 @@
+/* PR tree-optimization/90356 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-rounding-math -fno-signaling-nans -fsigned-zeros 
-fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-times "x_\[0-9]*.D. \\+ 0.0;" 12 "optimized" } 
} */
+/* { dg-final { scan-tree-dump-times "y_\[0-9]*.D. - 0.0;" 0 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " \[+-] 0.0;" 12 "optimized" } } */
+
+#include "pr90356-1.c"
--- gcc/testsuite/gcc.dg/tree-ssa/pr90356-3.c.jj2019-05-07 
10:34:07.271208185 +0200
+++ gcc/testsuite/gcc.dg/tree-ssa/pr90356-3.c   2019-05-07 11:00:50.345488636 
+0200
@@ -0,0 +1,15 @@
+/* PR tree-optimization/90356 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -frounding-math -fsignaling-nans -fsigned-zeros 
-fdump-tree-optimized" } */
+/* { dg-final { 

abstract out EH propagation cleanups

2019-05-07 Thread Aldy Hernandez

Hi.

We seem to have numerous copies of the same EH propagation cleanups 
scattered throughout the compiler.  The attached patch moves all the 
logic into one class that allows for easy marking of statements and 
automatic cleanup once it goes out of scope.


Tested on x86-64 Linux.

OK for trunk? (*)

Aldy

(*) If this is too invasive for the period immediately following the 
re-opening of stage1, I can hold off the commit (if approved).
gcc/

	* tree-ssa-propagate.h (class propagate_cleanups): New.
	* tree-ssa-propagate.c (class substitute_and_fold_dom_walker):
	Remove references to stmts_to_fixup and need_eh_cleanup.
	(propagate_cleanups::~propagate_cleanups): New.
	(propagate_cleanups::record_eh_change): New.
	(propagate_mark_stmt_for_cleanup): New.
	(propagate_cleanup): New.
	(substitute_and_fold_dom_walker::before_dom_children): Adjust for
	propagate_cleanups class.
	(substitute_and_fold_engine::substitute_and_fold): Remove manual
	EH cleanups.
	* gimple-ssa-evrp.c (class evrp_dom_walker): Adjust for
	propagate_cleanups class.
	(evrp_dom_walker::before_dom_children): Same.
	(evrp_dom_walker::cleanup): Same.
	* tree-ssa-dom.c: Remove need_eh_cleanup and need_noreturn_fixup
	globals.
	(class domfixups): New.
	(class dom_opt_dom_walker): Adjust for propagate_cleanups class.
	(pass_dominator::execute): Same.
	(dom_opt_dom_walker::optimize_stmt): Same.
	* tree-ssa-forwprop.c (tidy_after_forward_propagate_addr): Remove.
	(forward_propagate_addr_expr_1): Call
	propagate_mark_stmt_for_cleanup instead of
	tidy_after_forward_propagate_addr.
	(pass_forwprop::execute): Adjust for propagate_cleanups class.
	* tree-ssa-sccvn.c (class eliminate_dom_walker): Same.
	(class rpo_elim): Same.
	(eliminate_dom_walker::eliminate_dom_walker): Same.
	(eliminate_dom_walker::~eliminate_dom_walker): Same.
	(eliminate_dom_walker::eliminate_stmt): Same.
	(eliminate_dom_walker::eliminate_cleanup): Same.
	(eliminate_with_rpo_vn): Same.

diff --git a/gcc/gimple-ssa-evrp.c b/gcc/gimple-ssa-evrp.c
index 96da79bf028..ec3fa3a0918 100644
--- a/gcc/gimple-ssa-evrp.c
+++ b/gcc/gimple-ssa-evrp.c
@@ -73,11 +73,9 @@ public:
   evrp_range_analyzer (true),
   evrp_folder (evrp_range_analyzer.get_vr_values ())
 {
-  need_eh_cleanup = BITMAP_ALLOC (NULL);
 }
   ~evrp_dom_walker ()
 {
-  BITMAP_FREE (need_eh_cleanup);
 }
   virtual edge before_dom_children (basic_block);
   virtual void after_dom_children (basic_block);
@@ -85,9 +83,8 @@ public:
 
  private:
   DISABLE_COPY_AND_ASSIGN (evrp_dom_walker);
-  bitmap need_eh_cleanup;
-  auto_vec stmts_to_fixup;
   auto_vec stmts_to_remove;
+  propagate_cleanups fixups;
 
   class evrp_range_analyzer evrp_range_analyzer;
   class evrp_folder evrp_folder;
@@ -128,8 +125,6 @@ evrp_dom_walker::before_dom_children (basic_block bb)
   gimple *stmt = gsi_stmt (gsi);
   tree output = NULL_TREE;
   gimple *old_stmt = stmt;
-  bool was_noreturn = (is_gimple_call (stmt)
-			   && gimple_call_noreturn_p (stmt));
 
   if (dump_file && (dump_flags & TDF_DETAILS))
 	{
@@ -190,26 +185,7 @@ evrp_dom_walker::before_dom_children (basic_block bb)
 	}
 
   if (did_replace)
-	{
-	  /* If we cleaned up EH information from the statement,
-	 remove EH edges.  */
-	  if (maybe_clean_or_replace_eh_stmt (old_stmt, stmt))
-	bitmap_set_bit (need_eh_cleanup, bb->index);
-
-	  /* If we turned a not noreturn call into a noreturn one
-	 schedule it for fixup.  */
-	  if (!was_noreturn
-	  && is_gimple_call (stmt)
-	  && gimple_call_noreturn_p (stmt))
-	stmts_to_fixup.safe_push (stmt);
-
-	  if (gimple_assign_single_p (stmt))
-	{
-	  tree rhs = gimple_assign_rhs1 (stmt);
-	  if (TREE_CODE (rhs) == ADDR_EXPR)
-		recompute_tree_invariant_for_addr_expr (rhs);
-	}
-	}
+	fixups.record_change (old_stmt, stmt);
 }
 
   /* Visit BB successor PHI nodes and replace PHI args.  */
@@ -275,19 +251,6 @@ evrp_dom_walker::cleanup (void)
 	}
 }
 
-  if (!bitmap_empty_p (need_eh_cleanup))
-gimple_purge_all_dead_eh_edges (need_eh_cleanup);
-
-  /* Fixup stmts that became noreturn calls.  This may require splitting
- blocks and thus isn't possible during the dominator walk.  Do this
- in reverse order so we don't inadvertedly remove a stmt we want to
- fixup by visiting a dominating now noreturn call first.  */
-  while (!stmts_to_fixup.is_empty ())
-{
-  gimple *stmt = stmts_to_fixup.pop ();
-  fixup_noreturn_call (stmt);
-}
-
   evrp_folder.vr_values->cleanup_edges_and_switches ();
 }
 
diff --git a/gcc/tree-ssa-dom.c b/gcc/tree-ssa-dom.c
index b0d56fcf3e3..228f56035b1 100644
--- a/gcc/tree-ssa-dom.c
+++ b/gcc/tree-ssa-dom.c
@@ -89,11 +89,6 @@ class edge_info
 /* Track whether or not we have changed the control flow graph.  */
 static bool cfg_altered;
 
-/* Bitmap of blocks that have had EH statements cleaned.  We should
-   remove their dead edges eventually.  */
-static bitmap need_eh_cleanup;
-static vec 

Re: [PATCH] Implement LWG 2686, hash

2019-05-07 Thread Christophe Lyon
On Sat, 4 May 2019 at 16:36, Jonathan Wakely  wrote:
>
> On 03/05/19 23:42 +0100, Jonathan Wakely wrote:
> >On 23/03/17 17:49 +, Jonathan Wakely wrote:
> >>On 12/03/17 13:16 +0100, Daniel Krügler wrote:
> >>>The following is an *untested* patch suggestion, please verify.
> >>>
> >>>Notes: My interpretation is that hash should be
> >>>defined outside of the _GLIBCXX_COMPATIBILITY_CXX0X block, please
> >>>double-check that course of action.
> >>
> >>That's right.
> >>
> >>>I noticed that the preexisting hash did directly refer to
> >>>the private members of error_code albeit those have public access
> >>>functions. For consistency I mimicked that existing style when
> >>>implementing hash.
> >>
> >>I see no reason for that, so I've removed the friend declaration and
> >>used the public member functions.
> >
> >I'm going to do the same for hash too. It can also use the
> >public members instead of being a friend.
> >
> >
> >>Although this is a DR, I'm treating it as a new C++17 feature, so I've
> >>adjusted the patch to only add the new specialization for C++17 mode.
> >>We're too close to the GCC 7 release to be adding new things to the
> >>default mode, even minor things like this. After GCC 7 is released we
> >>can revisit it and decide if we want to enable it for all modes.
> >
> >We never revisited that, and it's still only enabled for C++17 and up.
> >I guess that's OK, but we could enabled it for C++11 and 14 on trunk
> >if we want. Anybody care enough to argue for that?
> >
> >>Here's what I've tested and will be committing.
> >>
> >>
> >
> >>commit 90ca0fd91f5c65af370beb20af06bdca257aaf63
> >>Author: Jonathan Wakely 
> >>Date:   Thu Mar 23 11:47:39 2017 +
> >>
> >>   Implement LWG 2686, std::hash, for C++17
> >>   2017-03-23  Daniel Kruegler  
> >>  Implement LWG 2686, Why is std::hash specialized for error_code,
> >>  but not error_condition?
> >>  * include/std/system_error (hash): Define for C++17.
> >>  * testsuite/20_util/hash/operators/size_t.cc (hash):
> >>  Instantiate test for error_condition.
> >>  * testsuite/20_util/hash/requirements/explicit_instantiation.cc
> >>  (hash): Instantiate hash.
> >>
> >>diff --git a/libstdc++-v3/include/std/system_error 
> >>b/libstdc++-v3/include/std/system_error
> >>index 6775a6e..ec7d25f 100644
> >>--- a/libstdc++-v3/include/std/system_error
> >>+++ b/libstdc++-v3/include/std/system_error
> >>@@ -373,14 +373,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >>_GLIBCXX_END_NAMESPACE_VERSION
> >>} // namespace
> >>
> >>-#ifndef _GLIBCXX_COMPATIBILITY_CXX0X
> >>-
> >>#include 
> >>
> >>namespace std _GLIBCXX_VISIBILITY(default)
> >>{
> >>_GLIBCXX_BEGIN_NAMESPACE_VERSION
> >>
> >>+#ifndef _GLIBCXX_COMPATIBILITY_CXX0X
> >>  // DR 1182.
> >>  /// std::hash specialization for error_code.
> >>  template<>
> >>@@ -394,12 +393,27 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >>  return std::_Hash_impl::__hash_combine(__e._M_cat, __tmp);
> >>  }
> >>};
> >>+#endif // _GLIBCXX_COMPATIBILITY_CXX0X
> >>+
> >>+#if __cplusplus > 201402L
> >>+  // DR 2686.
> >>+  /// std::hash specialization for error_condition.
> >>+  template<>
> >>+struct hash
> >>+: public __hash_base
> >>+{
> >>+  size_t
> >>+  operator()(const error_condition& __e) const noexcept
> >>+  {
> >>+ const size_t __tmp = std::_Hash_impl::hash(__e.value());
> >>+ return std::_Hash_impl::__hash_combine(__e.category(), __tmp);
> >
> >When I changed this from using __e._M_cat (as in Daniel's patch) to
> >__e.category() I introduced a bug, because the former is a pointer to
> >the error_category (and error_category objects are unique and so can
> >be identified by their address) and the latter is the object itself,
> >so we hash the bytes of an abstract base class instead of hashing the
> >pointer to it. Oops.
> >
> >Patch coming up to fix that.
>
> Here's the fix. Tested powerpc64le-linux, committed to trunk.
>
> I'll backport this to 7, 8 and 9 as well.
>

Hi Jonathan,

Does the new test lack dg-require-filesystem-ts ?

I'm seeing link failures on arm-eabi (using newlib):
Excess errors:
/libstdc++-v3/src/c++17/fs_ops.cc:806: undefined reference to `chdir'
/libstdc++-v3/src/c++17/fs_ops.cc:583: undefined reference to `mkdir'
/libstdc++-v3/src/c++17/fs_ops.cc:1134: undefined reference to `chmod'
/libstdc++-v3/src/c++17/../filesystem/ops-common.h:439: undefined
reference to `chmod'
/libstdc++-v3/src/c++17/fs_ops.cc:750: undefined reference to `pathconf'
/libstdc++-v3/src/c++17/fs_ops.cc:769: undefined reference to `getcwd'

Christophe


[PATCH] Fix PR90369

2019-05-07 Thread Richard Biener


The following fixes duplicate temporary file usage by lto-wrapper
with -save-temps.

Bootstrapped / tested on x86_64-unknown-linux-gnu, applied.

Richard.

2019-05-07  Richard Biener  

PR lto/90369
* lto-wrapper.c (debug_objcopy): Use the original filename
including archive offset for the filename used for -save-temps.

Index: gcc/lto-wrapper.c
===
--- gcc/lto-wrapper.c   (revision 270909)
+++ gcc/lto-wrapper.c   (working copy)
@@ -1044,6 +1044,7 @@ debug_objcopy (const char *infile, bool
   int err;
 
   const char *p;
+  const char *orig_infile = infile;
   off_t inoff = 0;
   long loffset;
   int consumed;
@@ -1080,9 +1081,9 @@ debug_objcopy (const char *infile, bool
 
   if (save_temps)
 {
-  outfile = (char *) xmalloc (strlen (infile)
+  outfile = (char *) xmalloc (strlen (orig_infile)
  + sizeof (".debug.temp.o") + 1);
-  strcpy (outfile, infile);
+  strcpy (outfile, orig_infile);
   strcat (outfile, ".debug.temp.o");
 }
   else


Re: [PATCH] PR fortran/90166 -- Add check for module prefix

2019-05-07 Thread Dominique d'Humières
Hi Steve,

> Ping.

AFAICT this has been committed as revision r270495.

Cheers,

Dominique


Re: [Patch, fortran] ISO_Fortran_binding PRs 90093, 90352 & 90355

2019-05-07 Thread Dominique d'Humières
Hi Paul,

With your patch, I see

FAIL: gfortran.dg/iso_c_binding_char_1.f90   -O   (test for errors, line 8)
FAIL: gfortran.dg/iso_c_binding_char_1.f90   -O   (test for errors, line 9)
FAIL: gfortran.dg/iso_c_binding_char_1.f90   -O  (test for excess errors)

This is due to a bad location of the errors:

/opt/gcc/work/gcc/testsuite/gfortran.dg/iso_c_binding_char_1.f90:7:16:

7 | subroutine bar(c,d) BIND(C)
  |1
Error: Character argument 'c' at (1) must be length 1 because procedure 'bar' 
is BIND(C)
/opt/gcc/work/gcc/testsuite/gfortran.dg/iso_c_binding_char_1.f90:7:18:

7 | subroutine bar(c,d) BIND(C)
  |  1
Error: Character argument 'd' at (1) must be length 1 because procedure 'bar' 
is BIND(C)

TIA

Dominique

Re: [libphobos, build] Enable libphobos on Solaris 11/x86

2019-05-07 Thread Iain Buclaw
On Tue, 7 May 2019 at 10:15, Rainer Orth  wrote:
>
> Hi Iain,
>
> > On Tue, 19 Feb 2019 at 13:58, Rainer Orth  
> > wrote:
> >>
> >> Hi Iain,
> >>
> >> >> Thanks.  This will have to wait for
> >> >>
> >> >> [libphobos] Use sections_elf_shared.d on Solaris 11.5 (PR 
> >> >> d/88150)
> >> >> https://gcc.gnu.org/ml/gcc-patches/2019-01/msg01661.html
> >> >
> >> > I'll make a fork of sections support tonight, as that seems to be the
> >> > right way forwards.
> >> >
> >> > Other parts will need upstreaming, I can do that as well.
> >>
> >> that would be great, thanks.
> >>
> >> >> and
> >> >> [libphobos] Work around lack of dlpi_tls_modid before Solaris 
> >> >> 11.5
> >> >> https://gcc.gnu.org/ml/gcc-patches/2019-01/msg01664.html
> >> >>
> >> >
> >> > Johannes has already commented there, and he is right about needing a
> >> > way to get tls data from DSOs.
> >>
> >> Understood.  Maybe I can find a way to generalize the hack derived from
> >> sections_ldc.d to TLS segments outside the executable.
> >>
> >> >> of course.  Maybe even
> >> >>
> >> >> [libphobos] Work around Solaris ld bug linking __tls_get_addr on
> >> >> 64-bit x86
> >> >> https://gcc.gnu.org/ml/gcc-patches/2019-01/msg01663.html
> >> >>
> >> >> will be needed depending on whether a proper ld fix makes it into
> >> >> Solaris 11.5 or not.
> >> >
> >> > I'm not sure about this, but haven't looked at it properly just yet.
> >>
> >> I don't think you need concern yourself with this very much: it's just a
> >> hack around a Solaris ld bug, suggested by the Solaris linker engineers
> >> for the moment.  Once the dlpi_tls_modid patch lands in Solaris 11.5
> >> (this week or next), I'll ask if they see a chance to have that bug
> >> fixed in time for the Solaris 11.5 release.  If so, nobody besides
> >> myself will ever be exposed to this issue.
> >>
> >
> > I've just given building gcc a try in an OpenIndiana VM, and get the 
> > following:
> >
> > ld: fatal: option -z has illegal argument 'relax=transtls'
> > ld: fatal: flags processing errors
> > collect2: error: ld returned 1 exit status
> >
> > $ ld --version
> > ld: Software Generation Utilities - Solaris Link Editors: 5.11-1.1756 
> > (illumos)
> >
> > My fatal mistake of course was not configuring with
> > --with-ld=/usr/gnu/bin/ld, however it is notable that not all Solaris
> > linkers support this relax=transtls option.
>
> I finally got back to this and did some experiments of my own: even the
> latest Illumos ld doesn't implement -z relax=transtls, neither has it
> fixed the underlying bug, so it's useless for 64-bit Illumos/x86.
>
> The following patch checks for those conditions (ld support for -z
> relax=transtls or gld in use) and only enables libphobos if they are
> met.
>
> I had to move the whole enable_libphobos/LIBPHOBOS_SUPPORTED block down
> in configure.ac so it's able to use the results of the augmented
> DRUNTIME_OS_LINK_SPEC.
>
> While I didn't test the patch on Illumos (gcc builds inside a VM are
> slow), I tried it on Solaris 10/x86 with gas/ld and gas/gld (and an
> improved version of the patch for PR d/88238) where it behaved as
> expected.  I've also included a previous version in a Solaris 11/x86
> bootstrap.
>

OK, thanks for having a look into it.

-- 
Iain


Re: [libphobos, build] Enable libphobos on Solaris 11/x86

2019-05-07 Thread Rainer Orth
Hi Iain,

> On Tue, 19 Feb 2019 at 13:58, Rainer Orth  
> wrote:
>>
>> Hi Iain,
>>
>> >> Thanks.  This will have to wait for
>> >>
>> >> [libphobos] Use sections_elf_shared.d on Solaris 11.5 (PR d/88150)
>> >> https://gcc.gnu.org/ml/gcc-patches/2019-01/msg01661.html
>> >
>> > I'll make a fork of sections support tonight, as that seems to be the
>> > right way forwards.
>> >
>> > Other parts will need upstreaming, I can do that as well.
>>
>> that would be great, thanks.
>>
>> >> and
>> >> [libphobos] Work around lack of dlpi_tls_modid before Solaris 11.5
>> >> https://gcc.gnu.org/ml/gcc-patches/2019-01/msg01664.html
>> >>
>> >
>> > Johannes has already commented there, and he is right about needing a
>> > way to get tls data from DSOs.
>>
>> Understood.  Maybe I can find a way to generalize the hack derived from
>> sections_ldc.d to TLS segments outside the executable.
>>
>> >> of course.  Maybe even
>> >>
>> >> [libphobos] Work around Solaris ld bug linking __tls_get_addr on
>> >> 64-bit x86
>> >> https://gcc.gnu.org/ml/gcc-patches/2019-01/msg01663.html
>> >>
>> >> will be needed depending on whether a proper ld fix makes it into
>> >> Solaris 11.5 or not.
>> >
>> > I'm not sure about this, but haven't looked at it properly just yet.
>>
>> I don't think you need concern yourself with this very much: it's just a
>> hack around a Solaris ld bug, suggested by the Solaris linker engineers
>> for the moment.  Once the dlpi_tls_modid patch lands in Solaris 11.5
>> (this week or next), I'll ask if they see a chance to have that bug
>> fixed in time for the Solaris 11.5 release.  If so, nobody besides
>> myself will ever be exposed to this issue.
>>
>
> I've just given building gcc a try in an OpenIndiana VM, and get the 
> following:
>
> ld: fatal: option -z has illegal argument 'relax=transtls'
> ld: fatal: flags processing errors
> collect2: error: ld returned 1 exit status
>
> $ ld --version
> ld: Software Generation Utilities - Solaris Link Editors: 5.11-1.1756 
> (illumos)
>
> My fatal mistake of course was not configuring with
> --with-ld=/usr/gnu/bin/ld, however it is notable that not all Solaris
> linkers support this relax=transtls option.

I finally got back to this and did some experiments of my own: even the
latest Illumos ld doesn't implement -z relax=transtls, neither has it
fixed the underlying bug, so it's useless for 64-bit Illumos/x86.

The following patch checks for those conditions (ld support for -z
relax=transtls or gld in use) and only enables libphobos if they are
met.

I had to move the whole enable_libphobos/LIBPHOBOS_SUPPORTED block down
in configure.ac so it's able to use the results of the augmented
DRUNTIME_OS_LINK_SPEC.

While I didn't test the patch on Illumos (gcc builds inside a VM are
slow), I tried it on Solaris 10/x86 with gas/ld and gas/gld (and an
improved version of the patch for PR d/88238) where it behaved as
expected.  I've also included a previous version in a Solaris 11/x86
bootstrap.

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


2019-04-25  Rainer Orth  

* m4/druntime/os.m4 (DRUNTIME_OS_LINK_SPEC): Only use -z
relax=transtls if linker supports it.
* configure.ac (enable_libphobos, LIBPHOBOS_SUPPORTED): Move down.
(x86_64-*-solaris2.* | i?86-*-solaris2.*): Only
mark supported with either gld or ld -z relax=transtls.
* configure: Regenerate.

# HG changeset patch
# Parent  c76abee7081bb8c24b2ca6f55749d092ae8cf3ea
Check if Solaris ld supports -z relax=transtls

diff --git a/libphobos/configure.ac b/libphobos/configure.ac
--- a/libphobos/configure.ac
+++ b/libphobos/configure.ac
@@ -113,42 +113,6 @@ AC_SUBST(phobos_compiler_shared_flag)
 lt_prog_compiler_pic_D="$phobos_compiler_shared_flag"
 pic_mode='default'
 
-AC_MSG_CHECKING([for --enable-libphobos])
-AC_ARG_ENABLE(libphobos,
-  [AS_HELP_STRING([--enable-libphobos], [Enable libphobos])])
-AC_MSG_RESULT($enable_libphobos)
-
-# See if supported.
-unset LIBPHOBOS_SUPPORTED
-AC_MSG_CHECKING([for host support for libphobos])
-. ${srcdir}/configure.tgt
-case ${host} in
-  x86_64-*-solaris2.* | i?86-*-solaris2.*)
-# libphobos doesn't compile with the Solaris/x86 assembler due to a
-# relatively low linelength limit.
-as_prog=`$CC -print-prog-name=as`
-if test -n "$as_prog" && $as_prog -v /dev/null 2>&1 | grep GNU > /dev/null 2>&1; then
-  druntime_cv_use_gas=yes;
-else
-  druntime_cv_use_gas=no;
-fi
-rm -f a.out
-if test x$druntime_cv_use_gas = xno; then
-  LIBPHOBOS_SUPPORTED=no
-fi
-;;
-esac
-AC_MSG_RESULT($LIBPHOBOS_SUPPORTED)
-
-# Decide if it's usable.
-case $LIBPHOBOS_SUPPORTED:$enable_libphobos in
-*:no)  use_libphobos=no  ;;
-*:yes) use_libphobos=yes ;;
-yes:*) use_libphobos=yes ;;
-*:*)   use_libphobos=no  ;;
-esac
-AM_CONDITIONAL(ENABLE_LIBPHOBOS, test 

Re: [PATCH] Fold (x + 0.0) + 0.0 to x + 0.0 (PR tree-optimization/90356)

2019-05-07 Thread Jakub Jelinek
On Tue, May 07, 2019 at 09:48:13AM +0200, Richard Biener wrote:
> Will leave the "correctness check" for other folks but the above is
> better written as
> 
> +   (outer_op (inner_op @0 REAL_CST@1) REAL_CST@2)
> +(if (real_zerop (@1)
> + && real_zerop (@2)
> 
> because that gets code-generated better.  Btw, for -fsignalling-nans

Ok, will change.  I want to introduce uniform_real_cst_p similar to
uniform_integer_cst_p incrementally and then it will change again.

> can we have a literal sNaN?  Then you need :c on the inner_op since
> I'm not sure we canonicalize to sNaN + 0.0 rather than 0.0 + sNaN.

I had :c on both initially, but that doesn't compile, because MINUS_EXPR
is not commutative.  And I wanted to avoid writing 4 patterns instead of 1.

> Maybe not worth optimizing though (since we rule out -frounding-math
> a similar case there doesn't need to be considered).
> 
> > +&& HONOR_SIGNED_ZEROS (element_mode (type))
> > +&& !HONOR_SIGN_DEPENDENT_ROUNDING (element_mode (type)))
> 
> You can write HONOR_SIGNED_ZEROS (type) here for brevity.

Ok, will do (and change it then in fold_real_zero_addition_p as well).

Jakub


Re: [PATCH] Fold (x + 0.0) + 0.0 to x + 0.0 (PR tree-optimization/90356)

2019-05-07 Thread Richard Biener
On Tue, 7 May 2019, Jakub Jelinek wrote:

> Hi!
> 
> fold_real_zero_addition_p will fold x + (-0.0) or x - 0.0 to x
> when not -frounding-math, but not the rest of the options when
> -fsigned-zeros, and not when -fsignaling-nans.
> If we have (x + 0.0) + 0.0, we can fold that to just x + 0.0 even
> when honoring signed zeros, and IMNSHO even when honoring sNaNs,
> of course unless -frounding-math, then we can't do anything.
> For x other than 0.0, -0.0 and sNaN it is obviously correct, for sNaN
> sNaN + 0.0 will raise an exception and turn the result into qNaN, which
> will not raise further exception on the second addition, so IMHO it is ok
> too (unless we want to say special case -fnon-call-exceptions and the
> exception handler changing the result back to sNaN and expecting yet another
> exception).  For 0.0/-0.0 if we can assume rounding other than towards
> negative infinity, the results are:
>   x x
> (0.0 + 0.0) + 0.0 = 0.0 = (0.0 + 0.0)
> (-0.0 + 0.0) + 0.0 = 0.0 = (-0.0 + 0.0)
> (0.0 - 0.0) - 0.0 = 0.0 = (0.0 - 0.0)
> (-0.0 - 0.0) - 0.0 = -0.0 = (-0.0 - 0.0)
> (0.0 + 0.0) - 0.0 = 0.0 = (0.0 + 0.0)
> (-0.0 + 0.0) - 0.0 = 0.0 = (-0.0 + 0.0)
> For the above ones, the two operations are always equal to the inner operation
> (0.0 - 0.0) + 0.0 = 0.0 = 0.0 + 0.0
> (-0.0 - 0.0) + 0.0 = 0.0 = -0.0 + 0.0
> For the above cases, the two operations are always equal to the outer 
> operation
> 
> If it is y + (-0.0), it is equivalent to y - 0.0 and if it is y - (-0.0),
> it is equivalent to y + 0.0 in the above.
> 
> For rounding towards negative infinity, 0.0 - 0.0 is -0.0 rather than 0.0
> and so some of the above equivalencies are not true.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> 
> 2019-05-07  Jakub Jelinek  
> 
>   PR tree-optimization/90356
>   * match.pd ((X +/- 0.0) +/- 0.0): Optimize into X +/- 0.0 if possible.
> 
>   * gcc.dg/tree-ssa/pr90356-1.c: New test.
>   * gcc.dg/tree-ssa/pr90356-2.c: New test.
>   * gcc.dg/tree-ssa/pr90356-3.c: New test.
>   * gcc.dg/tree-ssa/pr90356-4.c: New test.
> 
> --- gcc/match.pd.jj   2019-05-03 15:22:07.370401908 +0200
> +++ gcc/match.pd  2019-05-06 11:26:04.701663020 +0200
> @@ -152,6 +152,28 @@ (define_operator_list COND_TERNARY
>   (if (fold_real_zero_addition_p (type, @1, 1))
>(non_lvalue @0)))
>  
> +/* Even if the fold_real_zero_addition_p can't simplify X + 0.0
> +   into X, we can optimize (X + 0.0) + 0.0 or (X + 0.0) - 0.0
> +   or (X - 0.0) + 0.0 into X + 0.0 and (X - 0.0) - 0.0 into X - 0.0
> +   if not -frounding-math.  For sNaNs the first operation would raise
> +   exceptions but turn the result into qNan, so the second operation
> +   would not raise it.   */
> +(for inner_op (plus minus)
> + (for outer_op (plus minus)
> +  (simplify
> +   (outer_op (inner_op @0 real_zerop@1) real_zerop@2)
> +(if (TREE_CODE (@1) == REAL_CST
> +  && TREE_CODE (@2) == REAL_CST

Will leave the "correctness check" for other folks but the above is
better written as

+   (outer_op (inner_op @0 REAL_CST@1) REAL_CST@2)
+(if (real_zerop (@1)
+ && real_zerop (@2)

because that gets code-generated better.  Btw, for -fsignalling-nans
can we have a literal sNaN?  Then you need :c on the inner_op since
I'm not sure we canonicalize to sNaN + 0.0 rather than 0.0 + sNaN.
Maybe not worth optimizing though (since we rule out -frounding-math
a similar case there doesn't need to be considered).

> +  && HONOR_SIGNED_ZEROS (element_mode (type))
> +  && !HONOR_SIGN_DEPENDENT_ROUNDING (element_mode (type)))

You can write HONOR_SIGNED_ZEROS (type) here for brevity.

> + (with { bool plus1 = ((inner_op == PLUS_EXPR)
> +^ REAL_VALUE_MINUS_ZERO (TREE_REAL_CST (@1)));
> +  bool plus2 = ((outer_op == PLUS_EXPR)
> +^ REAL_VALUE_MINUS_ZERO (TREE_REAL_CST (@2))); }
> +  (if (plus2 && !plus1)
> +   (outer_op @0 @2)
> +   (inner_op @0 @1)))
> +
>  /* Simplify x - x.
> This is unsafe for certain floats even in non-IEEE formats.
> In IEEE, it is unsafe because it does wrong for NaNs.
> --- gcc/testsuite/gcc.dg/tree-ssa/pr90356-1.c.jj  2019-05-06 
> 11:39:58.998288472 +0200
> +++ gcc/testsuite/gcc.dg/tree-ssa/pr90356-1.c 2019-05-06 11:42:53.597489688 
> +0200
> @@ -0,0 +1,23 @@
> +/* PR tree-optimization/90356 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fno-rounding-math -fsignaling-nans -fsigned-zeros 
> -fdump-tree-optimized" } */
> +/* { dg-final { scan-tree-dump-times "x_\[0-9]*.D. \\+ 0.0;" 12 "optimized" 
> } } */
> +/* { dg-final { scan-tree-dump-times "y_\[0-9]*.D. - 0.0;" 4 "optimized" } } 
> */
> +/* { dg-final { scan-tree-dump-times " \[+-] 0.0;" 16 "optimized" } } */
> +
> +double f1 (double x) { return (x + 0.0) + 0.0; }
> +double f2 (double y) { return (y + (-0.0)) + (-0.0); }
> +double f3 (double y) { return (y - 0.0) - 0.0; }
> +double f4 (double x) { return (x - (-0.0)) - 

Re: Fix Solaris bootstrap: lto-common.c, lto-dump.c format mismatches

2019-05-07 Thread Richard Biener
On Tue, May 7, 2019 at 9:32 AM Rainer Orth  
wrote:
>
> Hi Richard,
>
> > On Mon, May 6, 2019 at 10:39 PM Jakub Jelinek  wrote:
> >>
> >> On Mon, May 06, 2019 at 08:46:05PM +0200, Richard Biener wrote:
> >> > >Fixed as follows.  i386-pc-solaris2.11 bootstrap has completed with
> >> > >this
> >> > >patch, sparc-sun-solaris2.11 is running the testsuite and
> >> > >x86_64-pc-linux-gnu is building the runtime libs.
> >> > >
> >> > >Ok for mainline?
> >> >
> >> > Can you use the PRI* format macros to match the types instead?
> >>
> >> Is that sufficiently portable though?
> >> I mean, for PRI[diouxX]64 we redefine those macros in hwint.h if needed.
> >> But we don't have anything similar for PRI[diouxX]PTR if inttypes.h
> >> is not available, and for size_t there isn't even any PRI* macro at all.
> >
> > Use those that hwint.h provides - casting the value should be done as a last
> > resort.  Adding PRI[diouxX]PTR macros in hwint.h might be useful, I merely
> > added those that I wanted to use.
> >
> > True, size_t is always a problem :/  Having something in hwint.h would
> > be useful though - I see the C standard is lacking here.
>
> this is what I bootstrapped successfully last night on
> i386-pc-solaris2.11 and x86_64-pc-linux-gnu.  I didn't feel like adding
> PRI?PTR fallback definitions for the single use of intptr_t, though, and
> am not really sure this is an improvement over my original patch.

Hmm, indeed.  I think in the end some printing support for size_t
is needed.  The use of intptr_t itself is probably somewhat bogus
and off_t should have been used (with the very same issue of course).
Or both should have been [u]int64_t from the start. Anyway sth to clean
up for LTO folks.

Patch is OK.

Richard.

> Rainer
>
> --
> -
> Rainer Orth, Center for Biotechnology, Bielefeld University
>
>
> 2019-05-06  Rainer Orth  
>
> * lto-common.c (lto_file_read): Print section->start as int64_t,
> section->len as uint64_t.
> * lto-dump.c (symbol_entry::dump): Print sz as uint64_t.
>


Re: Fix Solaris bootstrap: lto-common.c, lto-dump.c format mismatches

2019-05-07 Thread Rainer Orth
Hi Richard,

> On Mon, May 6, 2019 at 10:39 PM Jakub Jelinek  wrote:
>>
>> On Mon, May 06, 2019 at 08:46:05PM +0200, Richard Biener wrote:
>> > >Fixed as follows.  i386-pc-solaris2.11 bootstrap has completed with
>> > >this
>> > >patch, sparc-sun-solaris2.11 is running the testsuite and
>> > >x86_64-pc-linux-gnu is building the runtime libs.
>> > >
>> > >Ok for mainline?
>> >
>> > Can you use the PRI* format macros to match the types instead?
>>
>> Is that sufficiently portable though?
>> I mean, for PRI[diouxX]64 we redefine those macros in hwint.h if needed.
>> But we don't have anything similar for PRI[diouxX]PTR if inttypes.h
>> is not available, and for size_t there isn't even any PRI* macro at all.
>
> Use those that hwint.h provides - casting the value should be done as a last
> resort.  Adding PRI[diouxX]PTR macros in hwint.h might be useful, I merely
> added those that I wanted to use.
>
> True, size_t is always a problem :/  Having something in hwint.h would
> be useful though - I see the C standard is lacking here.

this is what I bootstrapped successfully last night on
i386-pc-solaris2.11 and x86_64-pc-linux-gnu.  I didn't feel like adding
PRI?PTR fallback definitions for the single use of intptr_t, though, and
am not really sure this is an improvement over my original patch.

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


2019-05-06  Rainer Orth  

* lto-common.c (lto_file_read): Print section->start as int64_t,
section->len as uint64_t.
* lto-dump.c (symbol_entry::dump): Print sz as uint64_t.

# HG changeset patch
# Parent  0694f1a35e195359100e42257e2e292a930a8829
Fix Solaris bootstrap: lto-common.c, lto-dump.c format mismatches

diff --git a/gcc/lto/lto-common.c b/gcc/lto/lto-common.c
--- a/gcc/lto/lto-common.c
+++ b/gcc/lto/lto-common.c
@@ -2111,8 +2111,9 @@ lto_file_read (lto_file *file, FILE *res
 fprintf (stdout, "\nLTO Object Name: %s\n", file->filename);
 fprintf (stdout, "\nNo.OffsetSize   Section Name\n\n");
 for (section = section_list.first; section != NULL; section = section->next)
-  fprintf (stdout, "%2d %8ld %8ld   %s\n",
-	   ++i, section->start, section->len, section->name);
+  fprintf (stdout, "%2d %8" PRId64 " %8" PRIu64 "   %s\n",
+	   ++i, (int64_t) section->start, (uint64_t) section->len,
+	   section->name);
   }
 
   /* Find all sub modules in the object and put their sections into new hash
diff --git a/gcc/lto/lto-dump.c b/gcc/lto/lto-dump.c
--- a/gcc/lto/lto-dump.c
+++ b/gcc/lto/lto-dump.c
@@ -60,7 +60,8 @@ struct symbol_entry
 const char *type_name = node->get_symtab_type_string ();
 const char *visibility = node->get_visibility_string ();
 size_t sz = get_size ();
-printf ("%s  %s  %4lu  %s  ", type_name, visibility, sz, name);
+printf ("%s  %s  %4" PRIu64 "  %s  ", type_name, visibility, (uint64_t) sz,
+	name);
   }
 };
 


Re: [Patch] Fix ix86_expand_sse_comi_round (PR Target/89750, PR Target/86444)

2019-05-07 Thread Hongtao Liu
On Tue, May 7, 2019 at 3:03 PM Jakub Jelinek  wrote:
>
> On Tue, May 07, 2019 at 01:38:49PM +0800, Hongtao Liu wrote:
> > +2019-05-06  H.J. Lu  
> > + Hongtao Liu  
> > +
> > + PR Target/89750
> > + PR Target/86444
>
> target, not Target.  Various people handle these in various scripts,
> so it is better to use consistency and exact spelling of the categories.
>
> Jakub

Ok, Thank you for your reminding.

-- 
BR,
Hongtao


Re: Fix Solaris bootstrap: lto-common.c, lto-dump.c format mismatches

2019-05-07 Thread Richard Biener
On Mon, May 6, 2019 at 10:39 PM Jakub Jelinek  wrote:
>
> On Mon, May 06, 2019 at 08:46:05PM +0200, Richard Biener wrote:
> > >Fixed as follows.  i386-pc-solaris2.11 bootstrap has completed with
> > >this
> > >patch, sparc-sun-solaris2.11 is running the testsuite and
> > >x86_64-pc-linux-gnu is building the runtime libs.
> > >
> > >Ok for mainline?
> >
> > Can you use the PRI* format macros to match the types instead?
>
> Is that sufficiently portable though?
> I mean, for PRI[diouxX]64 we redefine those macros in hwint.h if needed.
> But we don't have anything similar for PRI[diouxX]PTR if inttypes.h
> is not available, and for size_t there isn't even any PRI* macro at all.

Use those that hwint.h provides - casting the value should be done as a last
resort.  Adding PRI[diouxX]PTR macros in hwint.h might be useful, I merely
added those that I wanted to use.

True, size_t is always a problem :/  Having something in hwint.h would
be useful though - I see the C standard is lacking here.

Richard.

> Jakub


[PATCH] Fold (x + 0.0) + 0.0 to x + 0.0 (PR tree-optimization/90356)

2019-05-07 Thread Jakub Jelinek
Hi!

fold_real_zero_addition_p will fold x + (-0.0) or x - 0.0 to x
when not -frounding-math, but not the rest of the options when
-fsigned-zeros, and not when -fsignaling-nans.
If we have (x + 0.0) + 0.0, we can fold that to just x + 0.0 even
when honoring signed zeros, and IMNSHO even when honoring sNaNs,
of course unless -frounding-math, then we can't do anything.
For x other than 0.0, -0.0 and sNaN it is obviously correct, for sNaN
sNaN + 0.0 will raise an exception and turn the result into qNaN, which
will not raise further exception on the second addition, so IMHO it is ok
too (unless we want to say special case -fnon-call-exceptions and the
exception handler changing the result back to sNaN and expecting yet another
exception).  For 0.0/-0.0 if we can assume rounding other than towards
negative infinity, the results are:
  x x
(0.0 + 0.0) + 0.0 = 0.0 = (0.0 + 0.0)
(-0.0 + 0.0) + 0.0 = 0.0 = (-0.0 + 0.0)
(0.0 - 0.0) - 0.0 = 0.0 = (0.0 - 0.0)
(-0.0 - 0.0) - 0.0 = -0.0 = (-0.0 - 0.0)
(0.0 + 0.0) - 0.0 = 0.0 = (0.0 + 0.0)
(-0.0 + 0.0) - 0.0 = 0.0 = (-0.0 + 0.0)
For the above ones, the two operations are always equal to the inner operation
(0.0 - 0.0) + 0.0 = 0.0 = 0.0 + 0.0
(-0.0 - 0.0) + 0.0 = 0.0 = -0.0 + 0.0
For the above cases, the two operations are always equal to the outer operation

If it is y + (-0.0), it is equivalent to y - 0.0 and if it is y - (-0.0),
it is equivalent to y + 0.0 in the above.

For rounding towards negative infinity, 0.0 - 0.0 is -0.0 rather than 0.0
and so some of the above equivalencies are not true.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2019-05-07  Jakub Jelinek  

PR tree-optimization/90356
* match.pd ((X +/- 0.0) +/- 0.0): Optimize into X +/- 0.0 if possible.

* gcc.dg/tree-ssa/pr90356-1.c: New test.
* gcc.dg/tree-ssa/pr90356-2.c: New test.
* gcc.dg/tree-ssa/pr90356-3.c: New test.
* gcc.dg/tree-ssa/pr90356-4.c: New test.

--- gcc/match.pd.jj 2019-05-03 15:22:07.370401908 +0200
+++ gcc/match.pd2019-05-06 11:26:04.701663020 +0200
@@ -152,6 +152,28 @@ (define_operator_list COND_TERNARY
  (if (fold_real_zero_addition_p (type, @1, 1))
   (non_lvalue @0)))
 
+/* Even if the fold_real_zero_addition_p can't simplify X + 0.0
+   into X, we can optimize (X + 0.0) + 0.0 or (X + 0.0) - 0.0
+   or (X - 0.0) + 0.0 into X + 0.0 and (X - 0.0) - 0.0 into X - 0.0
+   if not -frounding-math.  For sNaNs the first operation would raise
+   exceptions but turn the result into qNan, so the second operation
+   would not raise it.   */
+(for inner_op (plus minus)
+ (for outer_op (plus minus)
+  (simplify
+   (outer_op (inner_op @0 real_zerop@1) real_zerop@2)
+(if (TREE_CODE (@1) == REAL_CST
+&& TREE_CODE (@2) == REAL_CST
+&& HONOR_SIGNED_ZEROS (element_mode (type))
+&& !HONOR_SIGN_DEPENDENT_ROUNDING (element_mode (type)))
+ (with { bool plus1 = ((inner_op == PLUS_EXPR)
+  ^ REAL_VALUE_MINUS_ZERO (TREE_REAL_CST (@1)));
+bool plus2 = ((outer_op == PLUS_EXPR)
+  ^ REAL_VALUE_MINUS_ZERO (TREE_REAL_CST (@2))); }
+  (if (plus2 && !plus1)
+   (outer_op @0 @2)
+   (inner_op @0 @1)))
+
 /* Simplify x - x.
This is unsafe for certain floats even in non-IEEE formats.
In IEEE, it is unsafe because it does wrong for NaNs.
--- gcc/testsuite/gcc.dg/tree-ssa/pr90356-1.c.jj2019-05-06 
11:39:58.998288472 +0200
+++ gcc/testsuite/gcc.dg/tree-ssa/pr90356-1.c   2019-05-06 11:42:53.597489688 
+0200
@@ -0,0 +1,23 @@
+/* PR tree-optimization/90356 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-rounding-math -fsignaling-nans -fsigned-zeros 
-fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-times "x_\[0-9]*.D. \\+ 0.0;" 12 "optimized" } 
} */
+/* { dg-final { scan-tree-dump-times "y_\[0-9]*.D. - 0.0;" 4 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " \[+-] 0.0;" 16 "optimized" } } */
+
+double f1 (double x) { return (x + 0.0) + 0.0; }
+double f2 (double y) { return (y + (-0.0)) + (-0.0); }
+double f3 (double y) { return (y - 0.0) - 0.0; }
+double f4 (double x) { return (x - (-0.0)) - (-0.0); }
+double f5 (double x) { return (x + 0.0) - 0.0; }
+double f6 (double x) { return (x + (-0.0)) - (-0.0); }
+double f7 (double x) { return (x - 0.0) + 0.0; }
+double f8 (double x) { return (x - (-0.0)) + (-0.0); }
+double f9 (double x) { double t = x + 0.0; return t + 0.0; }
+double f10 (double y) { double t = y + (-0.0); return t + (-0.0); }
+double f11 (double y) { double t = y - 0.0; return t - 0.0; }
+double f12 (double x) { double t = x - (-0.0); return t - (-0.0); }
+double f13 (double x) { double t = x + 0.0; return t - 0.0; }
+double f14 (double x) { double t = x + (-0.0); return t - (-0.0); }
+double f15 (double x) { double t = x - 0.0; return t + 0.0; }
+double f16 (double x) { double t = x - (-0.0); return t + (-0.0); }
--- 

Re: [Patch] Fix ix86_expand_sse_comi_round (PR Target/89750, PR Target/86444)

2019-05-07 Thread Jakub Jelinek
On Tue, May 07, 2019 at 01:38:49PM +0800, Hongtao Liu wrote:
> +2019-05-06  H.J. Lu  
> + Hongtao Liu  
> +
> + PR Target/89750
> + PR Target/86444

target, not Target.  Various people handle these in various scripts,
so it is better to use consistency and exact spelling of the categories.

Jakub


Re: Enable BF16 support (Please ignore my former email)

2019-05-07 Thread Hongtao Liu
On Wed, Apr 17, 2019 at 7:14 PM Uros Bizjak  wrote:
>
> On Wed, Apr 17, 2019 at 1:03 PM Uros Bizjak  wrote:
> >
> > On Wed, Apr 17, 2019 at 12:29 PM Hongtao Liu  wrote:
> > >
> > > On Fri, Apr 12, 2019 at 11:18 PM H.J. Lu  wrote:
> > > >
> > > > On Fri, Apr 12, 2019 at 3:19 AM Uros Bizjak  wrote:
> > > > >
> > > > > On Fri, Apr 12, 2019 at 11:03 AM Hongtao Liu  
> > > > > wrote:
> > > > > >
> > > > > > On Fri, Apr 12, 2019 at 3:30 PM Uros Bizjak  
> > > > > > wrote:
> > > > > > >
> > > > > > > On Fri, Apr 12, 2019 at 9:09 AM Liu, Hongtao 
> > > > > > >  wrote:
> > > > > > > >
> > > > > > > > Hi :
> > > > > > > > This patch is about to enable support for bfloat16 which 
> > > > > > > > will be in Future Cooper Lake, Please refer to 
> > > > > > > > https://software.intel.com/en-us/download/intel-architecture-instruction-set-extensions-programming-reference
> > > > > > > > for more details about BF16.
> > > > > > > >
> > > > > > > > There are 3 instructions for AVX512BF16: VCVTNE2PS2BF16, 
> > > > > > > > VCVTNEPS2BF16 and DPBF16PS instructions, which are Vector 
> > > > > > > > Neural Network Instructions supporting:
> > > > > > > >
> > > > > > > > -   VCVTNE2PS2BF16: Convert Two Packed Single Data to One 
> > > > > > > > Packed BF16 Data.
> > > > > > > > -   VCVTNEPS2BF16: Convert Packed Single Data to Packed 
> > > > > > > > BF16 Data.
> > > > > > > > -   VDPBF16PS: Dot Product of BF16 Pairs Accumulated into 
> > > > > > > > Packed Single Precision.
> > > > > > > >
> > > > > > > > Since only BF16 intrinsics are supported, we treat it as HI for 
> > > > > > > > simplicity.
> > > > > > >
> > > > > > > I think it was a mistake declaring cvtps2ph and cvtph2ps using 
> > > > > > > HImode
> > > > > > > instead of HFmode. Is there a compelling reason not to introduce
> > > > > > > corresponding bf16_format supporting infrastructure and declare 
> > > > > > > these
> > > > > > > intrinsics using half-binary (HBmode ?) mode instead?
> > > > > > >
> > > > > > > Uros.
> > > > > >
> > > > > > Bfloat16 isn't IEEE standard which we want to reserve HFmode for.
> > > > >
> > > > > True.
> > > > >
> > > > > > The IEEE 754 standard specifies a binary16 as having the following 
> > > > > > format:
> > > > > > Sign bit: 1 bit
> > > > > > Exponent width: 5 bits
> > > > > > Significand precision: 11 bits (10 explicitly stored)
> > > > > >
> > > > > > Bfloat16 has the following format:
> > > > > > Sign bit: 1 bit
> > > > > > Exponent width: 8 bits
> > > > > > Significand precision: 8 bits (7 explicitly stored), as opposed to 
> > > > > > 24
> > > > > > bits in a classical single-precision floating-point format
> > > > >
> > > > > This is why I proposed to introduce HBmode (and corresponding
> > > > > bfloat16_format) to distingush between ieee HFmode and BFmode.
> > > > >
> > > >
> > > > Unless there is BF16 language level support,  HBmode has no advantage
> > > > over HImode.   We can add HBmode when we gain BF16 language support.
> > > >
> > > > --
> > > > H.J.
> > >
> > > Any other comments, I'll merge this to trunk?
> >
> > It is not a regression, so please no.
>
> Ehm, "regression fix" ...
>
> Uros.

Update patch.

-- 
BR,
Hongtao
Index: gcc/common/config/i386/i386-common.c
===
--- gcc/common/config/i386/i386-common.c	(revision 270934)
+++ gcc/common/config/i386/i386-common.c	(working copy)
@@ -88,6 +88,7 @@
   (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512F_SET)
 #define OPTION_MASK_ISA_AVX512BITALG_SET \
   (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512F_SET)
+#define OPTION_MASK_ISA_AVX512BF16_SET OPTION_MASK_ISA_AVX512BF16
 #define OPTION_MASK_ISA_RTM_SET OPTION_MASK_ISA_RTM
 #define OPTION_MASK_ISA_PRFCHW_SET OPTION_MASK_ISA_PRFCHW
 #define OPTION_MASK_ISA_RDSEED_SET OPTION_MASK_ISA_RDSEED
@@ -215,6 +216,7 @@
 #define OPTION_MASK_ISA_AVX512VNNI_UNSET OPTION_MASK_ISA_AVX512VNNI
 #define OPTION_MASK_ISA_AVX512VPOPCNTDQ_UNSET OPTION_MASK_ISA_AVX512VPOPCNTDQ
 #define OPTION_MASK_ISA_AVX512BITALG_UNSET OPTION_MASK_ISA_AVX512BITALG
+#define OPTION_MASK_ISA_AVX512BF16_UNSET OPTION_MASK_ISA_AVX512BF16
 #define OPTION_MASK_ISA_RTM_UNSET OPTION_MASK_ISA_RTM
 #define OPTION_MASK_ISA_PRFCHW_UNSET OPTION_MASK_ISA_PRFCHW
 #define OPTION_MASK_ISA_RDSEED_UNSET OPTION_MASK_ISA_RDSEED
@@ -276,10 +278,14 @@
| OPTION_MASK_ISA_SSE_UNSET)
 
 #define OPTION_MASK_ISA2_AVX512F_UNSET \
-  (OPTION_MASK_ISA_AVX5124FMAPS_UNSET | OPTION_MASK_ISA_AVX5124VNNIW_UNSET)
+  (OPTION_MASK_ISA_AVX512BF16_UNSET \
+   | OPTION_MASK_ISA_AVX5124FMAPS_UNSET \
+   | OPTION_MASK_ISA_AVX5124VNNIW_UNSET)
 #define OPTION_MASK_ISA2_GENERAL_REGS_ONLY_UNSET \
   (OPTION_MASK_ISA2_AVX512F_UNSET)
 
+#define OPTION_MASK_ISA2_AVX512BW_UNSET OPTION_MASK_ISA_AVX512BF16_UNSET
+
 /* Set 1 << value as value of -malign-FLAG option.  */
 
 static void
@@ -738,6 +744,21 @@
 	}
   return true;
 
+case OPT_mavx512bf16:
+  if (value)
+	{
+	  opts->x_ix86_isa_flags2 |=