Go patch committed: Fix quoting in error message

2019-09-16 Thread Ian Lance Taylor
This patch to the Go frontend fixes the quoting of //go:linkname in an
error message.  Bootstrapped and ran Go testsuite on
x86_64-pc-linux-gnu.  Committed to mainline.

Ian
Index: gcc/go/gofrontend/MERGE
===
--- gcc/go/gofrontend/MERGE (revision 275700)
+++ gcc/go/gofrontend/MERGE (working copy)
@@ -1,4 +1,4 @@
-5af62eda697da21155091cf5375ed9edb4639b67
+722990deeede7801e4ed3ca5d53ce312a19fcd7a
 
 The first line of this file holds the git revision number of the last
 merge done from the gofrontend repository.
Index: gcc/go/gofrontend/gogo.cc
===
--- gcc/go/gofrontend/gogo.cc   (revision 275698)
+++ gcc/go/gofrontend/gogo.cc   (working copy)
@@ -2541,7 +2541,7 @@ Gogo::add_linkname(const std::string& go
 {
   if (ext_name.empty())
go_error_at(loc,
-   ("//% missing external name "
+   ("% missing external name "
 "for declaration of %s"),
go_name.c_str());
   else


[PATCH] Improve _Safe_iterator _M_distance_to

2019-09-16 Thread François Dumont
    Here is the patch to improve _Safe_iterator<>::_M_get_distance_to 
implementation.


I introduced a new _Distance_precision  __sp_sign_max_size for occasions 
where we can't find out the exact size of a range but still get the max 
size of it. Thanks to this the performance tests are much better when 
dealing with list iterators:


Normal mode:

copy_backward_deque_iterators.cc    deque 2 list 5616r 5616u    
0s 0mem    0pf
copy_backward_deque_iterators.cc    list 2 deque 1586r 1586u    
0s 0mem    0pf
copy_deque_iterators.cc      deque 2 list     5495r 5495u    
0s 0mem    0pf
copy_deque_iterators.cc      list 2 deque     2400r 2400u    
0s 0mem    0pf


Debug mode:

copy_backward_deque_iterators.cc    deque 2 list 5789r 5785u    
1s 0mem    0pf
copy_backward_deque_iterators.cc    list 2 deque 1656r 1655u    
0s 0mem    0pf
copy_deque_iterators.cc      deque 2 list     5792r 5793u    
0s 0mem    0pf
copy_deque_iterators.cc      list 2 deque     2636r 2636u    
0s 0mem    0pf


Tested under Linux x86_64.

I'll commit once other patches are in.


    * include/debug/forward_list
(_Sequence_traits<__debug::forward_list<>>::_S_size): Returns __dp_sign
    distance when not empty.
    * include/debug/list
    (_Sequence_traits<__debug::list<>>::_S_size): Likewise.
    * include/debug/helper_functions.h (__dp_sign_max_size): New
    _Distance_precision enum entry.
    * include/debug/safe_iterator.h
    (__copy_move_a(_II, _II, const _Safe_iterator<>&)): Check for output
    iterator _M_can_advance as soon as input range distance precision is
    strictly higher than __dp_size.
    (__copy_move_a(const _Safe_iterator<>&, const _Safe_iterator<>&,
    const _Safe_iterator<>&)): Likewise.
    (__copy_move_backward_a(_II, _II, const _Safe_iterator<>&)): Likewise.
    (__copy_move_backward_a(const _Safe_iterator<>&,
    const _Safe_iterator<>&, const _Safe_iterator<>&)): Likewise.
    (__equal_aux(_II, _II, const _Safe_iterator<>&)): Likewise.
    (__equal_aux(const _Safe_iterator<>&,
    const _Safe_iterator<>&, const _Safe_iterator<>&)): Likewise.

François

diff --git a/libstdc++-v3/include/debug/forward_list b/libstdc++-v3/include/debug/forward_list
index e30b09e..f1756ddec9d 100644
--- a/libstdc++-v3/include/debug/forward_list
+++ b/libstdc++-v3/include/debug/forward_list
@@ -911,7 +911,7 @@ namespace __gnu_debug
   _S_size(const std::__debug::forward_list<_Tp, _Alloc>& __seq)
   {
 	return __seq.empty()
-	  ? std::make_pair(0, __dp_exact) : std::make_pair(1, __dp_equality);
+	  ? std::make_pair(0, __dp_exact) : std::make_pair(1, __dp_sign);
   }
 };
 
diff --git a/libstdc++-v3/include/debug/helper_functions.h b/libstdc++-v3/include/debug/helper_functions.h
index b7aeafef12a..9429bb90a55 100644
--- a/libstdc++-v3/include/debug/helper_functions.h
+++ b/libstdc++-v3/include/debug/helper_functions.h
@@ -50,10 +50,11 @@ namespace __gnu_debug
*/
   enum _Distance_precision
 {
-  __dp_none,	// Not even an iterator type
-  __dp_equality,	//< Can compare iterator equality, only
-  __dp_sign,	//< Can determine equality and ordering
-  __dp_exact	//< Can determine distance precisely
+  __dp_none,		// Not even an iterator type
+  __dp_equality,		//< Can compare iterator equality, only
+  __dp_sign,		//< Can determine equality and ordering
+  __dp_sign_max_size,	//< __dp_sign and gives max range size
+  __dp_exact		//< Can determine distance precisely
 };
 
   template= 0;
 	}
diff --git a/libstdc++-v3/include/debug/list b/libstdc++-v3/include/debug/list
index 5eb9a6094e3..140546a633e 100644
--- a/libstdc++-v3/include/debug/list
+++ b/libstdc++-v3/include/debug/list
@@ -916,7 +916,7 @@ namespace __gnu_debug
   _S_size(const std::__debug::list<_Tp, _Alloc>& __seq)
   {
 	return __seq.empty()
-	  ? std::make_pair(0, __dp_exact) : std::make_pair(1, __dp_equality);
+	  ? std::make_pair(0, __dp_exact) : std::make_pair(1, __dp_sign);
   }
 };
 #endif
diff --git a/libstdc++-v3/include/debug/safe_iterator.h b/libstdc++-v3/include/debug/safe_iterator.h
index 1665c95f5df..584f8eec5b0 100644
--- a/libstdc++-v3/include/debug/safe_iterator.h
+++ b/libstdc++-v3/include/debug/safe_iterator.h
@@ -984,7 +984,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   __glibcxx_check_valid_range2(__first, __last, __dist);
   __glibcxx_check_can_increment(__result, __dist.first);
 
-  if (__dist.second == ::__gnu_debug::__dp_exact
+  if (__dist.second > ::__gnu_debug::__dp_sign
 	  && __result._M_can_advance(__dist.first, true))
 	return ::__gnu_debug::_Safe_iterator<_Ite, _Seq, _Cat>(
 		std::__copy_move_a<_IsMove>(__first, __last, __result.base()),
@@ -1008,7 +1008,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
   if (__dist.second > ::__gnu_debug::__dp_equality)
 	{
-	  if (__dist.second == ::__gnu_debug::__dp_exact
+	  if (__dist.second > 

[PATCH] lexicographical_comparison enhancements

2019-09-16 Thread François Dumont

Hi

This is the next part of:

https://gcc.gnu.org/ml/libstdc++/2019-09/msg00048.html

This time it is to improve behavior of std::lexicographical_compare for 
deque or safe iterators.


To do so I had to make lc internal implementation return int rather than 
bool otherwise it is impossible to have a chunk base approach for the 
deque iterators. I'm surprised that std::lexicographical_compare returns 
a bool by the way, usually comparisons return int.


Tested under Linux x86_64 normal and debug modes.

Ok to commit ?


    * include/bits/stl_algobase.h
    (__lexicographical_compare_impl): Return int.
    (__lexicographical_compare::__lc): Likewise.
    (__lexicographical_compare_aux1(_II1, _II1, _II2, _II2)): New.
    (__lexicographical_compare_aux1(_Deque_iterator<>, _Deque_iterator<>,
    _II2, _II2)): New.
    (__lexicographical_compare_aux1(_II1, _II1,
    _Deque_iterator<>, _Deque_iterator<>)): New.
    (__lexicographical_compare_aux1(_Deque_iterator<>, _Deque_iterator<>,
    _Deque_iterator<>, _Deque_iterator<>)): New.
    (__lexicographical_compare_aux): Adapt, call later.
    (__lexicographical_compare_aux(_Safe_iterator<>, _Safe_iterator<>,
    _II2, _II2)): New.
    (__lexicographical_compare_aux(_II1, _II1,
    _Safe_iterator<>, _Safe_iterator<>)): New.
    (__lexicographical_compare_aux(_Safe_iterator<>, _Safe_iterator<>,
    _Safe_iterator<>, _Safe_iterator<>)): New.
    (std::lexicographical_compare): Adapt, call later.
    * include/bits/stl_deque.h (__lexicographical_compare_aux1): New
    declarations.
    * include/bits/deque.tcc (__lex_cmp_dit): New.
    (__lexicographical_compare_aux1): New definitions.
    * include/debug/safe_iterator.h (__lexicographical_compare_aux): New.
    * testsuite/25_algorithms/lexicographical_compare/1.cc (test6, test7):
    New.
    * testsuite/25_algorithms/lexicographical_compare/deque_iterators/1.cc:
    New.

François

diff --git a/libstdc++-v3/include/bits/deque.tcc b/libstdc++-v3/include/bits/deque.tcc
index ab996ca52fa..db8f1b03eec 100644
--- a/libstdc++-v3/include/bits/deque.tcc
+++ b/libstdc++-v3/include/bits/deque.tcc
@@ -1208,6 +1208,98 @@ _GLIBCXX_END_NAMESPACE_CONTAINER
   return true;
 }
 
+  template
+int
+__lex_cmp_dit(
+	const _GLIBCXX_STD_C::_Deque_iterator<_Tp, _Ref, _Ptr>& __first1,
+	const _GLIBCXX_STD_C::_Deque_iterator<_Tp, _Ref, _Ptr>& __last1,
+	_II __first2, _II __last2)
+{
+  typedef _GLIBCXX_STD_C::_Deque_iterator<_Tp, _Ref, _Ptr> _Iter;
+  typedef typename _Iter::difference_type difference_type;
+
+  if (__first1._M_node != __last1._M_node)
+	{
+	  difference_type __len = __last2 - __first2;
+	  difference_type __flen
+	= std::min(__len, __first1._M_last - __first1._M_cur);
+	  if (int __ret = std::__lexicographical_compare_aux1(
+	  __first1._M_cur, __first1._M_last, __first2, __first2 + __flen))
+	return __ret;
+
+	  __first2 += __flen;
+	  __len -= __flen;
+	  __flen = std::min(__len, _Iter::_S_buffer_size());
+	  for (typename _Iter::_Map_pointer __node = __first1._M_node + 1;
+	   __node != __last1._M_node;
+	   __first2 += __flen, __len -= __flen,
+	   __flen = std::min(__len, _Iter::_S_buffer_size()),
+	   ++__node)
+	if (int __ret = std::__lexicographical_compare_aux1(
+		  *__node, *__node + _Iter::_S_buffer_size(),
+		  __first2, __first2 + __flen))
+	  return __ret;
+
+	  return std::__lexicographical_compare_aux1(
+	__last1._M_first, __last1._M_cur, __first2, __last2);
+	}
+
+  return std::__lexicographical_compare_aux1(
+	  __first1._M_cur, __last1._M_cur, __first2, __last2);
+}
+
+  template
+typename __gnu_cxx::__enable_if<
+  __is_random_access_iter<_II2>::__value, int>::__type
+__lexicographical_compare_aux1(
+		_GLIBCXX_STD_C::_Deque_iterator<_Tp1, _Ref1, _Ptr1> __first1,
+		_GLIBCXX_STD_C::_Deque_iterator<_Tp1, _Ref1, _Ptr1> __last1,
+		_II2 __first2, _II2 __last2)
+{ return std::__lex_cmp_dit(__first1, __last1, __first2, __last2); }
+
+  template
+int
+__lexicographical_compare_aux1(
+		_GLIBCXX_STD_C::_Deque_iterator<_Tp1, _Ref1, _Ptr1> __first1,
+		_GLIBCXX_STD_C::_Deque_iterator<_Tp1, _Ref1, _Ptr1> __last1,
+		_GLIBCXX_STD_C::_Deque_iterator<_Tp2, _Ref2, _Ptr2> __first2,
+		_GLIBCXX_STD_C::_Deque_iterator<_Tp2, _Ref2, _Ptr2> __last2)
+{ return std::__lex_cmp_dit(__first1, __last1, __first2, __last2); }
+
+  template
+typename __gnu_cxx::__enable_if<
+  __is_random_access_iter<_II1>::__value, int>::__type
+__lexicographical_compare_aux1(
+		_II1 __first1, _II1 __last1,
+		_GLIBCXX_STD_C::_Deque_iterator<_Tp2, _Ref2, _Ptr2> __first2,
+		_GLIBCXX_STD_C::_Deque_iterator<_Tp2, _Ref2, _Ptr2> __last2)
+{
+  typedef _GLIBCXX_STD_C::_Deque_iterator<_Tp2, _Ref2, _Ptr2> _Iter;
+  typedef typename _Iter::difference_type difference_type;
+
+  difference_type __len = __last1 - __first1;
+  while (__len > 0)
+	{
+	  const difference_type __flen = __first2._M_node == 

[PATCH, i386]: Fix PR91719, emit XCHG for seq_cst store on big cores

2019-09-16 Thread Uros Bizjak
Attached patch emits XCHG instead of store+MFENCE on big cores and
generic tuning

m_CORE_ALL | m_BDVER | m_ZNVER | m_GENERIC

(The tune can be added for other targets, too.)

2019-09-16  Uroš Bizjak  

PR target/91719
* config/i386/i386.h (TARGET_USE_XCHG_FOR_ATOMIC_STORE): New macro.
* config/i386/x86-tune.def (X86_TUNE_USE_XCHG_FOR_ATOMIC_STORE): New.
* config/i386/sync.md (atomic_store): emit XCHG for
TARGET_USE_XCHG_FOR_ATOMIC_STORE.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Committed to mainline SVN.

Uros.
Index: config/i386/i386.h
===
--- config/i386/i386.h  (revision 275752)
+++ config/i386/i386.h  (working copy)
@@ -590,6 +590,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_L
ix86_tune_features[X86_TUNE_AVOID_FALSE_DEP_FOR_BMI]
 #define TARGET_ONE_IF_CONV_INSN \
ix86_tune_features[X86_TUNE_ONE_IF_CONV_INSN]
+#define TARGET_USE_XCHG_FOR_ATOMIC_STORE \
+   ix86_tune_features[X86_TUNE_USE_XCHG_FOR_ATOMIC_STORE]
 #define TARGET_EMIT_VZEROUPPER \
ix86_tune_features[X86_TUNE_EMIT_VZEROUPPER]
 
Index: config/i386/sync.md
===
--- config/i386/sync.md (revision 275752)
+++ config/i386/sync.md (working copy)
@@ -306,8 +306,11 @@
 {
   operands[1] = force_reg (mode, operands[1]);
 
-  /* For seq-cst stores, when we lack MFENCE, use XCHG.  */
-  if (is_mm_seq_cst (model) && !(TARGET_64BIT || TARGET_SSE2))
+  /* For seq-cst stores, use XCHG
+when we lack MFENCE or when target prefers XCHG.  */
+  if (is_mm_seq_cst (model)
+ && (!(TARGET_64BIT || TARGET_SSE2)
+ || TARGET_USE_XCHG_FOR_ATOMIC_STORE))
{
  emit_insn (gen_atomic_exchange (gen_reg_rtx (mode),
operands[0], operands[1],
Index: config/i386/x86-tune.def
===
--- config/i386/x86-tune.def(revision 275752)
+++ config/i386/x86-tune.def(working copy)
@@ -313,6 +313,10 @@ DEF_TUNE (X86_TUNE_ONE_IF_CONV_INSN, "one_if_conv_
  m_SILVERMONT | m_KNL | m_KNM | m_INTEL | m_CORE_ALL | m_GOLDMONT
  | m_GOLDMONT_PLUS | m_TREMONT | m_GENERIC)
 
+/* X86_TUNE_USE_XCHG_FOR_ATOMIC_STORE: Use xchg instead of mov+mfence.  */
+DEF_TUNE (X86_TUNE_USE_XCHG_FOR_ATOMIC_STORE, "use_xchg_for_atomic_store",
+m_CORE_ALL | m_BDVER | m_ZNVER | m_GENERIC)
+
 /*/
 /* 387 instruction selection tuning  */
 /*/


Re: C++ PATCH for c++/91678 - wrong error with decltype and location wrapper

2019-09-16 Thread Marek Polacek
On Sun, Sep 15, 2019 at 10:18:29AM -0400, Jason Merrill wrote:
> On 9/5/19 9:24 PM, Marek Polacek wrote:
> > They use
> > non_lvalue_loc, but that won't create a NON_LVALUE_EXPR wrapper around a 
> > location
> > wrapper.
> 
> That seems like the bug. maybe_lvalue_p should be true for
> VIEW_CONVERT_EXPR.

That makes sense but it breaks in tsubst_* which doesn't expect a
NON_LVALUE_EXPR wrapped around a location wrapper.  Perhaps we want to handle
it like this.

Bootstrapped/regtested on x86_64-linux, ok for trunk?

2019-09-16  Marek Polacek  

PR c++/91678 - wrong error with decltype and location wrapper.
* pt.c (tsubst_copy): Handle NON_LVALUE_EXPRs wrapped around a location
wrapper. 

* fold-const.c (maybe_lvalue_p): Handle VIEW_CONVERT_EXPR.

* g++.dg/cpp0x/decltype73.C: New test.

diff --git gcc/cp/pt.c gcc/cp/pt.c
index 9de1b8fec97..4bd77ac2578 100644
--- gcc/cp/pt.c
+++ gcc/cp/pt.c
@@ -15786,8 +15786,16 @@ tsubst_copy (tree t, tree args, tsubst_flags_t 
complain, tree in_decl)
  return op;
}
}
- /* We shouldn't see any other uses of these in templates.  */
- gcc_unreachable ();
+ /* We can get a NON_LVALUE_EXPR wrapped around a location wrapper.  */
+ else if (code == NON_LVALUE_EXPR && location_wrapper_p (op))
+   {
+ tree type = tsubst (TREE_TYPE (t), args, complain, in_decl);
+ op = tsubst_copy (TREE_OPERAND (t, 0), args, complain, in_decl);
+ return build1 (code, type, op);
+   }
+ else
+   /* We shouldn't see any other uses of these in templates.  */
+   gcc_unreachable ();
}
 
 case CAST_EXPR:
diff --git gcc/fold-const.c gcc/fold-const.c
index a99dafec589..6d955a76f87 100644
--- gcc/fold-const.c
+++ gcc/fold-const.c
@@ -2594,6 +2594,7 @@ maybe_lvalue_p (const_tree x)
   case TARGET_EXPR:
   case COND_EXPR:
   case BIND_EXPR:
+  case VIEW_CONVERT_EXPR:
 break;
 
   default:
diff --git gcc/testsuite/g++.dg/cpp0x/decltype73.C 
gcc/testsuite/g++.dg/cpp0x/decltype73.C
new file mode 100644
index 000..cbe94a898e3
--- /dev/null
+++ gcc/testsuite/g++.dg/cpp0x/decltype73.C
@@ -0,0 +1,4 @@
+// PR c++/91678 - wrong error with decltype and location wrapper.
+// { dg-do compile { target c++11 } }
+
+float* test(float* c) { return (decltype(c + 0))(float*)c; }


Re: [PATCH][PR91749][arm] FDPIC: Handle -mflip-thumb

2019-09-16 Thread Ramana Radhakrishnan
On Mon, Sep 16, 2019 at 2:40 PM Christophe Lyon
 wrote:
>
> [Re-sending in plain text-mode, sorry for the duplicates]
>
> Hi,
>
> In PR91749, we have ICEs because -mflip-thumb switches to Thumb-1 (the
> default target cpu does not support Thumb-2).
>
> Although we already filter this in arm_configure_build_target, we
> forgot to handle cases when the mode is changed via attributes (either
> in the source code, or via -mflip-thumb).
>
> This patch adds the same error message when trying to apply the
> "thumb" attribute and the target does not support Thumb-2 (only if we
> are in FDPIC mode, of course).
>
> OK?

OK.

Ramana
>
> Thanks,
>
> Christophe


-O2 inliner returning 1/n: reduce EARLY_INLINING_INSNS for O1 and O2

2019-09-16 Thread Jan Hubicka
Hi,
as discussed on Cauldron this week I plan to push out changes enabling
-finline-functions at -O2 with limited parameters aiming to overal
better performance without large code size increases.

Currently we do inline agressively functions declared inline, we inline
when function size is expected to shrink and we also do limited
auto-inlining in early inliner for non-inline functions even if code
grows.  This is handled by PARAM_EARLY_INLINING_INSNS.

This patch tunes it down or -O2 in order to get some room for real
IPA inliner to do its work.
Combined efect of my chages are in
https://lnt.opensuse.org/db_default/v4/SPEC/latest_runs_report?younger_in_days=14_in_days=0_elf_detail_stats=on_percentage_change=0.001=ddee20190fa78935338bc3161c1b29b8528d82dd%2C9b247ee17d1030b88462531225cc842251507bb6

This involves further forking inline-insns-auto, inline-insns-single and
big-speedup params.

Generally I was able to mostly improve SPEC 2006 and 2017 scores as
follows:

O2 Kabylake
SPEC/SPEC2006/INT/total 0.58%   
SPEC/SPEC2006/FP/total  0.19%   
SPEC/SPEC2017/FP/total  0.45%   
SPEC/SPEC2017/INT/total 0.18%   

O2 LTO Kabylake
SPEC/SPEC2006/INT/total 1.08%   
SPEC/SPEC2006/FP/total  0.60%   

O2 Zen
SPEC/SPEC2006/INT/total 1.64%   
SPEC/SPEC2006/FP/total  0.23%   
SPEC/SPEC2017/INT/total -0.58%  
SPEC/SPEC2017/FP/total  0.52%   

O2 Zen LTO
SPEC/SPEC2006/FP/total  1.40%   
SPEC/SPEC2006/INT/total 1.26%   
SPEC/SPEC2017/INT/total 0.93%   
SPEC/SPEC2017/FP/total  -0.22%  

The SPEC2017 FP on Zen is affected by 10% regression on CactusBSSN that
seems to be due to microarchitectural behaviour depending on code layout
rather than any inlining changes in hot parts of program.  Other notable
regression is omnetpp that shows on Zen only too.  Comparing Zen and
Kaby result it seems that only consistent loser id gcc (3%) a xalancbmk
(2.8%) both with non-LTO only. I plan to investigate those if regression
persists even though it is bit small and there is no obvious problem in
the backtrace.

Code size improves by 0.67% or SPEC2006 non-LTO and regresses by 1.64% with LTO
For 2017 it is 2.2% improvement and 2.4% regression respectively.

The difference between LTO and non-LTO is mostly due to fact that LTO
units tends to hit overall unit growth cap of inlining since there are
too many inline candidates. For this reason the patch is not as
effective on Firefox and other realy big packages as I would like.  I
still plan number of changes to inliner this stage1 so this is not final
situation, but I think it is better to do the change early so it gets
tested on other architectures. (And it was concensus of the Caudlron
discussion by my understanding)

This patch is not enabling -finline-functions so it will temporarily
regress perofrmance (and improve code size). i am doing this in
incremental steps to get more data on both inliners.

Bootstrapped/regtested x86_64-linux, plan to commit it later today.

Honza

* ipa-inline.c (want_early_inline_function_p): Use
PARAM_EARLY_INLINING_INSNS_O2.
* params.def (PARAM_EARLY_INLINING_INSNS_O2): New.
(PARAM_EARLY_INLINING_INSNS): Update documentation.
* invoke.texi (early-inlining-insns-O2): New.
(early-inlining-insns): Update documentation.
Index: ipa-inline.c
===
--- ipa-inline.c(revision 275716)
+++ ipa-inline.c(working copy)
@@ -641,6 +641,10 @@ want_early_inline_function_p (struct cgr
 {
   int growth = estimate_edge_growth (e);
   int n;
+  int early_inlining_insns = opt_for_fn (e->caller->decl, optimize) >= 3
+? PARAM_VALUE (PARAM_EARLY_INLINING_INSNS)
+: PARAM_VALUE (PARAM_EARLY_INLINING_INSNS_O2);
+
 
   if (growth <= PARAM_VALUE (PARAM_MAX_INLINE_INSNS_SIZE))
;
@@ -654,26 +658,28 @@ want_early_inline_function_p (struct cgr
 growth);
  want_inline = false;
}
-  else if (growth > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS))
+  else if (growth > early_inlining_insns)
{
  if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 "  will not early inline: %C->%C, "
-"growth %i exceeds --param early-inlining-insns\n",
-e->caller, callee,
-growth);
+"growth %i exceeds --param 
early-inlining-insns%s\n",
+e->caller, callee, growth,
+opt_for_fn (e->caller->decl, optimize) >= 3
+? "" : "-O2");
  want_inline = false;
}
   else if ((n = num_calls 

[PATCH] * Makefile.in (build/genmatch.o): Depend on $(CPPLIB_H).

2019-09-16 Thread Jason Merrill
So that rebuilding my tree with or without the operator<=> patches properly
rebuilds genmatch.

Tested x86_64-pc-linux-gnu, applying to trunk as obvious.

---
 gcc/Makefile.in | 2 +-
 gcc/ChangeLog   | 4 
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 0fe02fb27a1..152df9fa9b3 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -2783,7 +2783,7 @@ build/genmddump.o : genmddump.c $(RTL_BASE_H) 
$(BCONFIG_H) $(SYSTEM_H)\
   $(CORETYPES_H) $(GTM_H) errors.h $(READ_MD_H) $(GENSUPPORT_H)
 build/genmatch.o : genmatch.c $(BCONFIG_H) $(SYSTEM_H) \
   $(CORETYPES_H) errors.h $(HASH_TABLE_H) hash-map.h $(GGC_H) is-a.h \
-  tree.def builtins.def internal-fn.def case-cfn-macros.h
+  tree.def builtins.def internal-fn.def case-cfn-macros.h $(CPPLIB_H)
 build/gencfn-macros.o : gencfn-macros.c $(BCONFIG_H) $(SYSTEM_H)   \
   $(CORETYPES_H) errors.h $(HASH_TABLE_H) hash-set.h builtins.def  \
   internal-fn.def
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 1921bc775d6..63fae8615e5 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,7 @@
+2019-09-16  Jason Merrill  
+
+   * Makefile.in (build/genmatch.o): Depend on $(CPPLIB_H).
+
 2019-09-16  Martin Liska  
 
* gimple-fold.c (or_comparisons_1): Remove rules moved

base-commit: 4bc26ca2c40a29d0f49bad7a8326dbac6f764c35
-- 
2.21.0



Re: [SVE] PR86753

2019-09-16 Thread Prathamesh Kulkarni
On Mon, 9 Sep 2019 at 09:36, Prathamesh Kulkarni
 wrote:
>
> On Mon, 9 Sep 2019 at 16:45, Richard Sandiford
>  wrote:
> >
> > Prathamesh Kulkarni  writes:
> > > With patch, the only following FAIL remains for aarch64-sve.exp:
> > > FAIL: gcc.target/aarch64/sve/cond_unary_2.c -march=armv8.2-a+sve
> > > scan-assembler-times \\tmovprfx\\t 6
> > > which now contains 14.
> > > Should I adjust the test, assuming the change isn't a regression ?
> >
> > Well, it is kind-of a regression, but it really just means that the
> > integer code is now consistent with the floating-point code in having
> > an unnecessary MOVPRFX.  So I think adjusting the count is fine.
> > Presumably any future fix for the existing redundant MOVPRFXs will
> > apply to the new ones as well.
> >
> > The patch looks good to me, just some very minor nits:
> >
> > > @@ -8309,11 +8309,12 @@ vect_double_mask_nunits (tree type)
> > >
> > >  /* Record that a fully-masked version of LOOP_VINFO would need MASKS to
> > > contain a sequence of NVECTORS masks that each control a vector of 
> > > type
> > > -   VECTYPE.  */
> > > +   VECTYPE. SCALAR_MASK if non-null, represents the mask used for 
> > > corresponding
> > > +   load/store stmt.  */
> >
> > Should be two spaces between sentences.  Maybe:
> >
> >VECTYPE.  If SCALAR_MASK is nonnull, the fully-masked loop would AND
> >these vector masks with the vector version of SCALAR_MASK.  */
> >
> > since the mask isn't necessarily for a load or store statement.
> >
> > > [...]
> > > @@ -1879,7 +1879,8 @@ static tree permute_vec_elements (tree, tree, tree, 
> > > stmt_vec_info,
> > > says how the load or store is going to be implemented and GROUP_SIZE
> > > is the number of load or store statements in the containing group.
> > > If the access is a gather load or scatter store, GS_INFO describes
> > > -   its arguments.
> > > +   its arguments. SCALAR_MASK is the scalar mask used for corresponding
> > > +   load or store stmt.
> >
> > Maybe:
> >
> >its arguments.  If the load or store is conditional, SCALAR_MASK is the
> >condition under which it occurs.
> >
> > since SCALAR_MASK can be null here too.
> >
> > > [...]
> > > @@ -9975,6 +9978,31 @@ vectorizable_condition (stmt_vec_info stmt_info, 
> > > gimple_stmt_iterator *gsi,
> > >/* Handle cond expr.  */
> > >for (j = 0; j < ncopies; j++)
> > >  {
> > > +  tree loop_mask = NULL_TREE;
> > > +  bool swap_cond_operands = false;
> > > +
> > > +  if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
> > > + {
> > > +   scalar_cond_masked_key cond (cond_expr, ncopies);
> > > +   if (loop_vinfo->scalar_cond_masked_set.contains (cond))
> > > + {
> > > +   vec_loop_masks *masks = _VINFO_MASKS (loop_vinfo);
> > > +   loop_mask = vect_get_loop_mask (gsi, masks, ncopies, vectype, 
> > > j);
> > > + }
> > > +   else
> > > + {
> > > +   cond.code = invert_tree_comparison (cond.code,
> > > +   HONOR_NANS (TREE_TYPE 
> > > (cond.op0)));
> >
> > Long line.  Maybe just split it out into a separate assignment:
> >
> >   bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
> >   cond.code = invert_tree_comparison (cond.code, honor_nans);
> >
> > > +   if (loop_vinfo->scalar_cond_masked_set.contains (cond))
> > > + {
> > > +   vec_loop_masks *masks = _VINFO_MASKS (loop_vinfo);
> > > +   loop_mask = vect_get_loop_mask (gsi, masks, ncopies, 
> > > vectype, j);
> >
> > Long line here too.
> >
> > > [...]
> > > @@ -10090,6 +10121,26 @@ vectorizable_condition (stmt_vec_info stmt_info, 
> > > gimple_stmt_iterator *gsi,
> > >   }
> > >   }
> > >   }
> > > +
> > > +   if (loop_mask)
> > > + {
> > > +   if (COMPARISON_CLASS_P (vec_compare))
> > > + {
> > > +   tree tmp = make_ssa_name (vec_cmp_type);
> > > +   gassign *g = gimple_build_assign (tmp,
> > > + TREE_CODE (vec_compare),
> > > + TREE_OPERAND 
> > > (vec_compare, 0),
> > d> +TREE_OPERAND 
> > (vec_compare, 1));
> >
> > Two long lines.
> >
> > > +   vect_finish_stmt_generation (stmt_info, g, gsi);
> > > +   vec_compare = tmp;
> > > + }
> > > +
> > > +   tree tmp2 = make_ssa_name (vec_cmp_type);
> > > +   gassign *g = gimple_build_assign (tmp2, BIT_AND_EXPR, 
> > > vec_compare, loop_mask);
> >
> > Long line here too.
> >
> > > [...]
> > > diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
> > > index dc181524744..c4b2d8e8647 100644
> > > --- a/gcc/tree-vectorizer.c
> > > +++ b/gcc/tree-vectorizer.c
> > > @@ -1513,3 +1513,39 @@ make_pass_ipa_increase_alignment (gcc::context 
> > > *ctxt)
> > >  {
> > >

Re: [C++ Patch] Another bunch of location fixes

2019-09-16 Thread Jason Merrill
On Mon, Sep 16, 2019 at 5:22 AM Paolo Carlini  wrote:
>
> On 15/09/19 16:22, Jason Merrill wrote:
> > On 9/12/19 9:41 AM, Paolo Carlini wrote:
> >> +  if (!valid_array_size_p (dname
> >> +   ? declarator->id_loc : input_location,
> >
> > Use the id_loc local variable?
>
> This diagnostic is inside the loop over declarator->declarator.
> Eventually, outside the loop, the id_loc local is updated to the final
> declarator->id_loc or input_location. Norrmally in the loop we use the
> current declarator->id_loc: what I tested seems more correct to me (we
> have to account for input_location too because valid_array_size_p,
> shared with the C front-end, wants a sound location)

OK.

Jason



Re: Add "fast" conversions from arrays to bitmaps

2019-09-16 Thread Martin Liška
+··static·size_t·size·(const·T·()[N])·{·return·N;·} 


Hello.

This leads to a clang warning:

gcc/array-traits.h:45:33: warning: unused parameter 'x' [-Wunused-parameter]

Can you please fix it?
Thanks,
Martin


Re: [PATCH 2/2 V3] Use post-dom info to update if/switch predicate (PR ipa/91089)

2019-09-16 Thread Jan Hubicka
> This one is split from original patch.

This patch is also OK (modulo the changelog entry and testing). Please
wait with comitting this for bit over a day after commiting the first so
the periodic benchmarks picks each change differently.

Thanks,
Honza
> 
> Feng
> ---
> diff --git a/gcc/ipa-fnsummary.c b/gcc/ipa-fnsummary.c
> index 1bf1806eaf8..5423756d275 100644
> --- a/gcc/ipa-fnsummary.c
> +++ b/gcc/ipa-fnsummary.c
> @@ -1197,8 +1197,14 @@ set_cond_stmt_execution_predicate (struct 
> ipa_func_body_info *fbi,
> ? code : inverted_code);
> /* invert_tree_comparison will return ERROR_MARK on FP
>comparsions that are not EQ/NE instead of returning proper
> -  unordered one.  Be sure it is not confused with NON_CONSTANT.  */
> -   if (this_code != ERROR_MARK)
> +  unordered one.  Be sure it is not confused with NON_CONSTANT.
> +
> +  And if the edge's target is the final block of diamond CFG graph
> +  of this conditional statement, we do not need to compute
> +  predicate for the edge because the final block's predicate must
> +  be at least as that of the first block of the statement.  */
> +   if (this_code != ERROR_MARK
> +   && !dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest))
>   {
> predicate p
>   = add_condition (summary, index, size, , this_code,
> @@ -1282,6 +1288,14 @@ set_switch_stmt_execution_predicate (struct 
> ipa_func_body_info *fbi,
>*(predicate *) e->aux = false;
>  }
>  
> +  e = gimple_switch_edge (cfun, last, 0);
> +  /* Set BOUND_COUNT to maximum count to bypass computing predicate for
> + default case if its target basic block is in convergence point of all
> + switch cases, which can be determined by checking whether it
> + post-dominates the switch statement.  */
> +  if (dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest))
> +bound_count = INT_MAX;
> +
>n = gimple_switch_num_labels (last);
>for (case_idx = 1; case_idx < n; ++case_idx)
>  {
> @@ -1293,7 +1307,12 @@ set_switch_stmt_execution_predicate (struct 
> ipa_func_body_info *fbi,
>min = CASE_LOW (cl);
>max = CASE_HIGH (cl);
>  
> -  if (!max)
> +  /* The case's target basic block is in convergence point of all switch
> +  cases, its predicate should be at least as that of the switch
> +  statement.  */
> +  if (dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest))
> + p = true;
> +  else if (!max)
>   p = add_condition (summary, index, size, , EQ_EXPR,
>  unshare_expr_without_location (min));
>else
> @@ -1463,10 +1482,10 @@ compute_bb_predicates (struct ipa_func_body_info *fbi,
>   break;
>   }
>   }
> -   if (p == false)
> - gcc_checking_assert (!bb->aux);
> -   else
> +   if (p != false)
>   {
> +   basic_block pdom_bb;
> +
> if (!bb->aux)
>   {
> done = false;
> @@ -1485,6 +1504,34 @@ compute_bb_predicates (struct ipa_func_body_info *fbi,
> *((predicate *) bb->aux) = p;
>   }
>   }
> +
> +   /* For switch/if statement, we can OR-combine predicates of all
> +  its cases/branches to get predicate for basic block in their
> +  convergence point, but sometimes this will generate very
> +  complicated predicate.  Actually, we can get simplified
> +  predicate in another way by using the fact that predicate
> +  for a basic block must also hold true for its post dominators.
> +  To be specific, basic block in convergence point of
> +  conditional statement should include predicate of the
> +  statement.  */
> +   pdom_bb = get_immediate_dominator (CDI_POST_DOMINATORS, bb);
> +   if (pdom_bb == EXIT_BLOCK_PTR_FOR_FN (my_function) || !pdom_bb)
> + ;
> +   else if (!pdom_bb->aux)
> + {
> +   done = false;
> +   pdom_bb->aux = edge_predicate_pool.allocate ();
> +   *((predicate *) pdom_bb->aux) = p;
> + }
> +   else if (p != *(predicate *) pdom_bb->aux)
> + {
> +   p = p.or_with (summary->conds, *(predicate *)pdom_bb->aux);
> +   if (p != *(predicate *) pdom_bb->aux)
> + {
> +   done = false;
> +   *((predicate *) pdom_bb->aux) = p;
> + }
> + }
>   }
>   }
>  }
> @@ -2089,6 +2136,7 @@ analyze_function_body (struct cgraph_node *node, bool 
> early)
>if (opt_for_fn (node->decl, optimize))
>  {
>calculate_dominance_info (CDI_DOMINATORS);
> +  calculate_dominance_info (CDI_POST_DOMINATORS);
>if (!early)
>  loop_optimizer_init (LOOPS_NORMAL | 

Re: [PATCH 1/2 V3] Setup predicate for switch default case in IPA (PR ipa/91089)

2019-09-16 Thread Jan Hubicka
Hi,
patch is OK with change below provided that you add a ChangeLog entry
(it is usual to write those into the emails) and that it passes
bootstrap (it is also usual to indicate this).

Do you have rights to the svn repository and copyright assignment done?

>  
> +DEFPARAM (PARAM_IPA_MAX_SWITCH_PREDICATE_BOUNDS,
> +   "ipa-max-switch-predicate-bounds",
> +   "Maximal number of boundary endpoints of case ranges of switch "
> +   "statement.  For switch exceeding this limit, do not construct cost-"
> +   "evaluating predicate for default case during IPA function summary"
> +   "generation.",

I think those texts are intended to not be too long. I think the first
sentence extended by " used during IPA functoin summary generation" is
good enough.

Thank you,
Honza


Re: [PATCH 5/5] Rewrite second part of or_comparisons_1 into match.pd.

2019-09-16 Thread Martin Liška

On 9/16/19 5:07 AM, Richard Biener wrote:

the :c on both code1 and code2 are not necessary since the constant
is always second via canonicalization.


Fine, fixed.

Btw. I've just installed the whole patchset.

Martin


Re: [PATCH 3/5] Rewrite part of and_comparisons_1 into match.pd.

2019-09-16 Thread Martin Liška

On 9/16/19 5:04 AM, Richard Biener wrote:

On Wed, 11 Sep 2019, Martin Liška wrote:


On 9/11/19 3:19 PM, Martin Liška wrote:

On 9/11/19 2:43 PM, Richard Biener wrote:

Any particular reason you needed to swap the calls in
maybe_fold_and/or_comparisons?


You didn't answer this, besides that the patch is OK.


Ah, sorry.

No, there's not any particular reason. My motivation was that I moved
the patterns from the beginning of and_comparisons_1 to match.pd.
So that I wanted to begin with the maybe_fold_comparisons_from_match_pd.

I'll put it back to the original order.

Martin



Thanks,
Richard.





[PATCH][PR91749][arm] FDPIC: Handle -mflip-thumb

2019-09-16 Thread Christophe Lyon
[Re-sending in plain text-mode, sorry for the duplicates]

Hi,

In PR91749, we have ICEs because -mflip-thumb switches to Thumb-1 (the
default target cpu does not support Thumb-2).

Although we already filter this in arm_configure_build_target, we
forgot to handle cases when the mode is changed via attributes (either
in the source code, or via -mflip-thumb).

This patch adds the same error message when trying to apply the
"thumb" attribute and the target does not support Thumb-2 (only if we
are in FDPIC mode, of course).

OK?

Thanks,

Christophe
gcc/ChangeLog:

2019-09-16  Christophe Lyon  

PR target/91749
* config/arm/arm.c (arm_valid_target_attribute_rec): Make sure the
mode attributed is supported by FDPIC.

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index c452771..ceabe0a 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -31175,7 +31175,11 @@ arm_valid_target_attribute_rec (tree args, struct 
gcc_options *opts)
 {
   argstr = NULL;
   if (!strcmp (q, "thumb"))
-   opts->x_target_flags |= MASK_THUMB;
+   {
+ opts->x_target_flags |= MASK_THUMB;
+ if (TARGET_FDPIC && !arm_arch_thumb2)
+   sorry ("FDPIC mode is not supported in Thumb-1 mode");
+   }
 
   else if (!strcmp (q, "arm"))
opts->x_target_flags &= ~MASK_THUMB;


Re: [PATCH][ARM] Enable code hoisting with -Os (PR80155)

2019-09-16 Thread Richard Biener
On Mon, Sep 16, 2019 at 2:59 PM Wilco Dijkstra  wrote:
>
> Hi Prathamesh,
>
> > My only concern with the patch is that the issue isn't specific to
> > code-hoisting.
> > For this particular case (reproducible with pr77445-2.c), disabling
> > jump threading
> > doesn't cause the register spill with hoisting enabled.
> > Likewise disabling forwprop3 and forwprop4 prevents the spill.
> > The last time I tried, setting setting
> > max-jump-thread-duplication-stmts to 20 and
> > fsm-scale-path-stmts to 3, not only removed the spill but also
> > resulted in 9 more hoistings,
> > but regressed some other test on jump threading.
> > So I was wondering whether we should look into fine-tuning relevant params,
> > instead of disabling code hoisting entirely (we will end up regressing
> > some test cases either way) ?
>
> The point is that -fcode-hoisting is the optimization that made some 
> benchmarks
> run significantly slower compared to older GCC versions. It's been more than 
> 2.5
> years without progress, so let's just fix it now in the obvious way.

The issue with the bugzilla is that it lacked appropriate testcase(s) and thus
it is now a mess.  There are clear testcases (maybe not in the benchmarks you
care about) that benefit from code hoisting as enabler, mainly when control
flow can be then converted to data flow.  Also note that "size optimizations"
are important for all cases where followup transforms have size limits on the IL
in place.

Richard.

> Yes, there are likely better solutions. And if someone writes an acceptable 
> patch
> then we can easily enable hoisting again. Surely it is far better to have a 
> fixed
> compiler *now* rather than wait a few more years for a solution that has the
> same effect but which might not be trivially backportable?
>
> Cheers,
> Wilco
>


Re: [PATCH][ARM] Enable code hoisting with -Os (PR80155)

2019-09-16 Thread Wilco Dijkstra
Hi Prathamesh,

> My only concern with the patch is that the issue isn't specific to
> code-hoisting.
> For this particular case (reproducible with pr77445-2.c), disabling
> jump threading
> doesn't cause the register spill with hoisting enabled.
> Likewise disabling forwprop3 and forwprop4 prevents the spill.
> The last time I tried, setting setting
> max-jump-thread-duplication-stmts to 20 and
> fsm-scale-path-stmts to 3, not only removed the spill but also
> resulted in 9 more hoistings,
> but regressed some other test on jump threading.
> So I was wondering whether we should look into fine-tuning relevant params,
> instead of disabling code hoisting entirely (we will end up regressing
> some test cases either way) ?

The point is that -fcode-hoisting is the optimization that made some benchmarks
run significantly slower compared to older GCC versions. It's been more than 2.5
years without progress, so let's just fix it now in the obvious way.

Yes, there are likely better solutions. And if someone writes an acceptable 
patch
then we can easily enable hoisting again. Surely it is far better to have a 
fixed
compiler *now* rather than wait a few more years for a solution that has the
same effect but which might not be trivially backportable?

Cheers,
Wilco



Re: [ARM/FDPIC v6 20/24] [ARM][testsuite] FDPIC: Skip tests using architectures unsupported by FDPIC

2019-09-16 Thread Christophe Lyon
Hi Kyrill,

I didn't commit this patch yet: are you OK with it?

Thanks,

Christophe

On Mon, 9 Sep 2019 at 17:52, Christophe Lyon  wrote:

> From: Christophe Lyon 
>
> Since FDPIC currently supports arm and thumb-2 modes only, these tests
> fail because they enforce an architecture version that doesn't match
> these restrictions.
>
> This patch introduces new values for the arm_arch effective-target
> (v4t_thumb, v5t_thumb, v5te_thumb, v6_thumb, v6k_thumb, v6z_thumb) as
> needed, and adds them to the relevant tests.  In addition, it adds
> v4t_arm, v5t_arm, v5te_arm, v6_arm, v6k_arm and v6z_arm to avoid
> skipping some tests when GCC is configured to generate Thumb code by
> default.
>
> It also adds the corresponding non-thumb effective-target to the tests
> that were missing it.
>
> The existing v4t, v5t, v5te, v6 v6k and v6z effective-targets now force
> -mfloat-abi=softfp since these thumb-1 targets do not support
> hard-float anyway.
>
> Finally, the patch removes the special case to detect the presence of
> -marm in the flags, since it makes atomic_loaddi tests unsupported:
> since the flags in question also include -march, the combination is
> supported, while -marm alone is not if GCC is configured to target an
> M-profile CPU.
>
> 2019-XX-XX  Christophe Lyon  
>
> gcc/testsuite/
> * lib/target-supports.exp
> (check_effective_target_arm_arch_FUNC_ok): Add v4t_arm, v4t_thumb,
> v5t_arm, v5t_thumb, v5te_arm, v5te_thumb, v6_arm, v6_thumb,
> v6k_arm, v6k_thumb, v6z_arm, v6z_thumb.
> Add -mfloat-abi=softfp to v4t, v5t, v5te, v6, v6k, v6z.
> Remove early exit for -marm.
> * gcc.target/arm/armv6-unaligned-load-ice.c: Add arm_arch
> effective-target.
> * gcc.target/arm/attr-unaligned-load-ice.c: Likewise.
> * gcc.target/arm/ftest-armv4-arm.c: Likewise.
> * gcc.target/arm/ftest-armv4t-arm.c: Likewise.
> * gcc.target/arm/ftest-armv4t-thumb.c: Likewise.
> * gcc.target/arm/ftest-armv5t-arm.c: Likewise.
> * gcc.target/arm/ftest-armv5t-thumb.c: Likewise.
> * gcc.target/arm/ftest-armv5te-arm.c: Likewise.
> * gcc.target/arm/ftest-armv5te-thumb.c: Likewise.
> * gcc.target/arm/ftest-armv6-arm.c: Likewise.
> * gcc.target/arm/ftest-armv6-thumb.c: Likewise.
> * gcc.target/arm/ftest-armv6k-arm.c: Likewise.
> * gcc.target/arm/ftest-armv6k-thumb.c: Likewise.
> * gcc.target/arm/ftest-armv6m-thumb.c: Likewise.
> * gcc.target/arm/ftest-armv6t2-arm.c: Likewise.
> * gcc.target/arm/ftest-armv6t2-thumb.c: Likewise.
> * gcc.target/arm/ftest-armv6z-arm.c: Likewise.
> * gcc.target/arm/ftest-armv6z-thumb.c: Likewise.
> * gcc.target/arm/g2.c: Likewise.
> * gcc.target/arm/macro_defs1.c: Likewise.
> * gcc.target/arm/pr59858.c: Likewise.
> * gcc.target/arm/pr65647-2.c: Likewise.
> * gcc.target/arm/pr79058.c: Likewise.
> * gcc.target/arm/pr83712.c: Likewise.
> * gcc.target/arm/pragma_arch_switch_2.c: Likewise.
> * gcc.target/arm/scd42-1.c: Likewise.
> * gcc.target/arm/scd42-2.c: Likewise.
> * gcc.target/arm/scd42-3.c: Likewise.
> * gcc.c-torture/compile/pr82096.c: Fix arm_arch effective-target.
> * gcc.target/arm/attr_arm-err.c: Likewise.
> * gcc.target/arm/di-longlong64-sync-withldrexd.c: Likewise.
>
> Change-Id: I0845b262b241026561cc52a19ff8bb1659675e49
>
> diff --git a/gcc/testsuite/gcc.c-torture/compile/pr82096.c
> b/gcc/testsuite/gcc.c-torture/compile/pr82096.c
> index d144b70..4e695cd 100644
> --- a/gcc/testsuite/gcc.c-torture/compile/pr82096.c
> +++ b/gcc/testsuite/gcc.c-torture/compile/pr82096.c
> @@ -1,4 +1,4 @@
> -/* { dg-require-effective-target arm_arch_v5t_ok { target arm*-*-* } } */
> +/* { dg-require-effective-target arm_arch_v5t_thumb_ok { target arm*-*-*
> } } */
>  /* { dg-skip-if "Do not combine float-abi values" { arm*-*-* } {
> "-mfloat-abi=*" } { "-mfloat-abi=soft" } } */
>  /* { dg-additional-options "-march=armv5t -mthumb -mfloat-abi=soft" {
> target arm*-*-* } } */
>
> diff --git a/gcc/testsuite/gcc.target/arm/armv6-unaligned-load-ice.c
> b/gcc/testsuite/gcc.target/arm/armv6-unaligned-load-ice.c
> index 88528f1..886a012 100644
> --- a/gcc/testsuite/gcc.target/arm/armv6-unaligned-load-ice.c
> +++ b/gcc/testsuite/gcc.target/arm/armv6-unaligned-load-ice.c
> @@ -1,6 +1,7 @@
>  /* { dg-do compile } */
>  /* { dg-skip-if "avoid conflicting multilib options" { *-*-* } {
> "-march=*" } { "-march=armv6k" } } */
>  /* { dg-skip-if "avoid conflicting multilib options" { *-*-* } { "-marm"
> } { "" } } */
> +/* { dg-require-effective-target arm_arch_v6k_thumb_ok } */
>  /* { dg-options "-mthumb -Os -mfloat-abi=softfp" } */
>  /* { dg-add-options arm_arch_v6k } */
>
> diff --git a/gcc/testsuite/gcc.target/arm/attr-unaligned-load-ice.c
> b/gcc/testsuite/gcc.target/arm/attr-unaligned-load-ice.c
> index e1ed1c1..2eeb522 

Re: [PATCH, AArch64, v3 0/6] LSE atomics out-of-line

2019-09-16 Thread Wilco Dijkstra
Hi Richard,

>> So what is the behaviour when you explicitly select a specific CPU?
>
> Selecting a specific cpu selects the specific architecture that the cpu
> supports, does it not?  Thus the architecture example above still applies.
>
> Unless I don't understand what distinction that you're making?

When you select a CPU the goal is that we optimize and schedule for that
specific microarchitecture. That implies using atomics that work best for
that core rather than outlining them.

>> I'd say that by the time GCC10 is released and used in distros, systems 
>> without
>> LSE atomics would be practically non-existent. So we should favour LSE 
>> atomics
>> by default.
>
> I suppose.  Does it not continue to be true that an a53 is more impacted by 
> the
> branch prediction than an a76?

That's hard to say for sure - the cost of taken branches (3 in just a few 
instructions for
the outlined atomics) might well affect big/wide cores more. Also note 
Cortex-A55
(successor of Cortex-A53) has LSE atomics.

Wilco

[PATCH] Fix PR91756

2019-09-16 Thread Richard Biener


The following makes the fix for PR87132 less constrained so we can use
the recently added facility for VN disambiguation agains the original
ref tree.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.

Richard.

2019-09-16  Richard Biener  

PR tree-optimization/91756
PR tree-optimization/87132
* tree-ssa-alias.h (enum translate_flags): New.
(get_continuation_for_phi): Use it instead of simple bool flag.
(walk_non_aliased_vuses): Likewise.
* tree-ssa-alias.c (maybe_skip_until): Adjust.
(get_continuation_for_phi): When looking across backedges only
disallow valueization.
(walk_non_aliased_vuses): Adjust.
* tree-ssa-sccvn.c (vn_reference_lookup_3): Avoid valueization
if requested.

* gcc.dg/tree-ssa/ssa-fre-81.c: New testcase.

Index: gcc/tree-ssa-alias.c
===
--- gcc/tree-ssa-alias.c(revision 275746)
+++ gcc/tree-ssa-alias.c(working copy)
@@ -3150,7 +3150,8 @@ static bool
 maybe_skip_until (gimple *phi, tree , basic_block target_bb,
  ao_ref *ref, tree vuse, bool tbaa_p, unsigned int ,
  bitmap *visited, bool abort_on_visited,
- void *(*translate)(ao_ref *, tree, void *, bool *),
+ void *(*translate)(ao_ref *, tree, void *, translate_flags *),
+ translate_flags disambiguate_only,
  void *data)
 {
   basic_block bb = gimple_bb (phi);
@@ -3185,7 +3186,7 @@ maybe_skip_until (gimple *phi, tree 
return !abort_on_visited;
  vuse = get_continuation_for_phi (def_stmt, ref, tbaa_p, limit,
   visited, abort_on_visited,
-  translate, data);
+  translate, data, disambiguate_only);
  if (!vuse)
return false;
  continue;
@@ -3200,9 +3201,9 @@ maybe_skip_until (gimple *phi, tree 
  --limit;
  if (stmt_may_clobber_ref_p_1 (def_stmt, ref, tbaa_p))
{
- bool disambiguate_only = true;
+ translate_flags tf = disambiguate_only;
  if (translate
- && (*translate) (ref, vuse, data, _only) == NULL)
+ && (*translate) (ref, vuse, data, ) == NULL)
;
  else
return false;
@@ -3233,8 +3234,10 @@ tree
 get_continuation_for_phi (gimple *phi, ao_ref *ref, bool tbaa_p,
  unsigned int , bitmap *visited,
  bool abort_on_visited,
- void *(*translate)(ao_ref *, tree, void *, bool *),
- void *data)
+ void *(*translate)(ao_ref *, tree, void *,
+translate_flags *),
+ void *data,
+ translate_flags disambiguate_only)
 {
   unsigned nargs = gimple_phi_num_args (phi);
 
@@ -3276,13 +3279,15 @@ get_continuation_for_phi (gimple *phi, a
   else if (! maybe_skip_until (phi, arg0, dom, ref, arg1, tbaa_p,
   limit, visited,
   abort_on_visited,
-  /* Do not translate when walking over
+  translate,
+  /* Do not valueize when walking over
  backedges.  */
   dominated_by_p
 (CDI_DOMINATORS,
  gimple_bb (SSA_NAME_DEF_STMT (arg1)),
  phi_bb)
-  ? NULL : translate, data))
+  ? TR_DISAMBIGUATE
+  : disambiguate_only, data))
return NULL_TREE;
 }
 
@@ -3320,7 +3325,8 @@ get_continuation_for_phi (gimple *phi, a
 void *
 walk_non_aliased_vuses (ao_ref *ref, tree vuse, bool tbaa_p,
void *(*walker)(ao_ref *, tree, void *),
-   void *(*translate)(ao_ref *, tree, void *, bool *),
+   void *(*translate)(ao_ref *, tree, void *,
+  translate_flags *),
tree (*valueize)(tree),
unsigned , void *data)
 {
@@ -3373,7 +3379,7 @@ walk_non_aliased_vuses (ao_ref *ref, tre
{
  if (!translate)
break;
- bool disambiguate_only = false;
+ translate_flags disambiguate_only = TR_TRANSLATE;
  res = (*translate) (ref, vuse, data, _only);
  /* Failed lookup and translation.  */
  if (res == (void *)-1)
@@ -3385,7 +3391,7 @@ walk_non_aliased_vuses (ao_ref *ref, tre
  else if (res != NULL)
 

[PATCH 2/2 V3] Use post-dom info to update if/switch predicate (PR ipa/91089)

2019-09-16 Thread Feng Xue OS
This one is split from original patch.

Feng
---
diff --git a/gcc/ipa-fnsummary.c b/gcc/ipa-fnsummary.c
index 1bf1806eaf8..5423756d275 100644
--- a/gcc/ipa-fnsummary.c
+++ b/gcc/ipa-fnsummary.c
@@ -1197,8 +1197,14 @@ set_cond_stmt_execution_predicate (struct 
ipa_func_body_info *fbi,
  ? code : inverted_code);
  /* invert_tree_comparison will return ERROR_MARK on FP
 comparsions that are not EQ/NE instead of returning proper
-unordered one.  Be sure it is not confused with NON_CONSTANT.  */
- if (this_code != ERROR_MARK)
+unordered one.  Be sure it is not confused with NON_CONSTANT.
+
+And if the edge's target is the final block of diamond CFG graph
+of this conditional statement, we do not need to compute
+predicate for the edge because the final block's predicate must
+be at least as that of the first block of the statement.  */
+ if (this_code != ERROR_MARK
+ && !dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest))
{
  predicate p
= add_condition (summary, index, size, , this_code,
@@ -1282,6 +1288,14 @@ set_switch_stmt_execution_predicate (struct 
ipa_func_body_info *fbi,
   *(predicate *) e->aux = false;
 }
 
+  e = gimple_switch_edge (cfun, last, 0);
+  /* Set BOUND_COUNT to maximum count to bypass computing predicate for
+ default case if its target basic block is in convergence point of all
+ switch cases, which can be determined by checking whether it
+ post-dominates the switch statement.  */
+  if (dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest))
+bound_count = INT_MAX;
+
   n = gimple_switch_num_labels (last);
   for (case_idx = 1; case_idx < n; ++case_idx)
 {
@@ -1293,7 +1307,12 @@ set_switch_stmt_execution_predicate (struct 
ipa_func_body_info *fbi,
   min = CASE_LOW (cl);
   max = CASE_HIGH (cl);
 
-  if (!max)
+  /* The case's target basic block is in convergence point of all switch
+cases, its predicate should be at least as that of the switch
+statement.  */
+  if (dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest))
+   p = true;
+  else if (!max)
p = add_condition (summary, index, size, , EQ_EXPR,
   unshare_expr_without_location (min));
   else
@@ -1463,10 +1482,10 @@ compute_bb_predicates (struct ipa_func_body_info *fbi,
break;
}
}
- if (p == false)
-   gcc_checking_assert (!bb->aux);
- else
+ if (p != false)
{
+ basic_block pdom_bb;
+
  if (!bb->aux)
{
  done = false;
@@ -1485,6 +1504,34 @@ compute_bb_predicates (struct ipa_func_body_info *fbi,
  *((predicate *) bb->aux) = p;
}
}
+
+ /* For switch/if statement, we can OR-combine predicates of all
+its cases/branches to get predicate for basic block in their
+convergence point, but sometimes this will generate very
+complicated predicate.  Actually, we can get simplified
+predicate in another way by using the fact that predicate
+for a basic block must also hold true for its post dominators.
+To be specific, basic block in convergence point of
+conditional statement should include predicate of the
+statement.  */
+ pdom_bb = get_immediate_dominator (CDI_POST_DOMINATORS, bb);
+ if (pdom_bb == EXIT_BLOCK_PTR_FOR_FN (my_function) || !pdom_bb)
+   ;
+ else if (!pdom_bb->aux)
+   {
+ done = false;
+ pdom_bb->aux = edge_predicate_pool.allocate ();
+ *((predicate *) pdom_bb->aux) = p;
+   }
+ else if (p != *(predicate *) pdom_bb->aux)
+   {
+ p = p.or_with (summary->conds, *(predicate *)pdom_bb->aux);
+ if (p != *(predicate *) pdom_bb->aux)
+   {
+ done = false;
+ *((predicate *) pdom_bb->aux) = p;
+   }
+   }
}
}
 }
@@ -2089,6 +2136,7 @@ analyze_function_body (struct cgraph_node *node, bool 
early)
   if (opt_for_fn (node->decl, optimize))
 {
   calculate_dominance_info (CDI_DOMINATORS);
+  calculate_dominance_info (CDI_POST_DOMINATORS);
   if (!early)
 loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
   else
@@ -2469,6 +2517,7 @@ analyze_function_body (struct cgraph_node *node, bool 
early)
   else if (!ipa_edge_args_sum)
ipa_free_all_node_params ();
   free_dominance_info (CDI_DOMINATORS);
+  free_dominance_info (CDI_POST_DOMINATORS);
 }
   if (dump_file)
 

[PATCH 1/2 V3] Setup predicate for switch default case in IPA (PR ipa/91089)

2019-09-16 Thread Feng Xue OS
>> +   if (this_code != ERROR_MARK
>> +   && !dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest))
>>   {
>> predicate p
>>   = add_condition (summary, index, size, , this_code,

> So this change is handling the diamond conditional you describe above?
> This is bit of hack since it leaves the edge predicate unnecesarily
> conservative though I see it saves some conditions to be inserted and
> makes things to go smoother.
>
> Please add a comment that explain this and reffers to the other places
> where we do this (in the switch handling below).
Done.

> I believe the case ranges always has to be INTEGER_CST. In this case all
> this can be written using wide ints and produce a lot better code
> (avoiding need to build & lookup & share all temporary tree codes).
> You can take a look at the tree-switch-conversion code which does quite
> some of this wide int handling.
> 
> Richard may have an opinion on this.
Done.

> So this basically  the idea is to or BB's predicate to the
> post-dominator predicate.  This seems safe to do (we need to be careful
> so that the dataflow still converges), but I would preffer to get this
> in separately possibly with a testcase that shows an improvement in the
> dataflow answer.
I've split the patch to two.

Thanks for your comments.
Feng
---
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 1391a562c35..7f312c96f37 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -11960,6 +11960,12 @@ not spend too much time analyzing huge functions, it 
gives up and
 consider all memory clobbered after examining
 @option{ipa-max-aa-steps} statements modifying memory.
 
+@item ipa-max-switch-predicate-bounds
+Maximal number of boundary endpoints of case ranges of switch statement.
+For switch exceeding this limit, IPA-CP will not construct cloning cost
+predicate, which is used to estimate cloning benefit, for default case
+of the switch statement.
+
 @item lto-partitions
 Specify desired number of partitions produced during WHOPR compilation.
 The number of partitions should exceed the number of CPUs used for compilation.
diff --git a/gcc/ipa-fnsummary.c b/gcc/ipa-fnsummary.c
index 278bf606661..1bf1806eaf8 100644
--- a/gcc/ipa-fnsummary.c
+++ b/gcc/ipa-fnsummary.c
@@ -1269,13 +1269,21 @@ set_switch_stmt_execution_predicate (struct 
ipa_func_body_info *fbi,
   if (!unmodified_parm_or_parm_agg_item (fbi, last, op, , , 
))
 return;
 
+  auto_vec > ranges;
+  tree type = TREE_TYPE (op);
+  int bound_limit = PARAM_VALUE (PARAM_IPA_MAX_SWITCH_PREDICATE_BOUNDS);
+  int bound_count = 0;
+  wide_int vr_wmin, vr_wmax;
+  value_range_kind vr_type = get_range_info (op, _wmin, _wmax);
+
   FOR_EACH_EDGE (e, ei, bb->succs)
 {
   e->aux = edge_predicate_pool.allocate ();
   *(predicate *) e->aux = false;
 }
+
   n = gimple_switch_num_labels (last);
-  for (case_idx = 0; case_idx < n; ++case_idx)
+  for (case_idx = 1; case_idx < n; ++case_idx)
 {
   tree cl = gimple_switch_label (last, case_idx);
   tree min, max;
@@ -1285,12 +1293,7 @@ set_switch_stmt_execution_predicate (struct 
ipa_func_body_info *fbi,
   min = CASE_LOW (cl);
   max = CASE_HIGH (cl);
 
-  /* For default we might want to construct predicate that none
- of cases is met, but it is bit hard to do not having negations
- of conditionals handy.  */
-  if (!min && !max)
-   p = true;
-  else if (!max)
+  if (!max)
p = add_condition (summary, index, size, , EQ_EXPR,
   unshare_expr_without_location (min));
   else
@@ -1304,7 +1307,113 @@ set_switch_stmt_execution_predicate (struct 
ipa_func_body_info *fbi,
}
   *(class predicate *) e->aux
= p.or_with (summary->conds, *(class predicate *) e->aux);
+
+  /* If there are too many disjoint case ranges, predicate for default
+case might become too complicated.  So add a limit here.  */
+  if (bound_count > bound_limit)
+   continue;
+
+  bool new_range = true;
+
+  if (!ranges.is_empty ())
+   {
+ wide_int curr_wmin = wi::to_wide (min);
+ wide_int last_wmax = wi::to_wide (ranges.last ().second);
+
+ /* Merge case ranges if they are continuous.  */
+ if (curr_wmin == last_wmax + 1)
+   new_range = false;
+ else if (vr_type == VR_ANTI_RANGE)
+   {
+ /* If two disjoint case ranges can be connected by anti-range
+of switch index, combine them to one range.  */
+ if (wi::lt_p (vr_wmax, curr_wmin - 1, TYPE_SIGN (type)))
+   vr_type = VR_UNDEFINED;
+ else if (wi::le_p (vr_wmin, last_wmax + 1, TYPE_SIGN (type)))
+   new_range = false;
+   }
+   }
+
+  if (!max)
+   max = min;
+
+  /* Create/extend a case range.  And we count endpoints of range set,
+this number nearly equals to number of conditions that we will create
+   

Re: [C++ Patch] Another bunch of location fixes

2019-09-16 Thread Paolo Carlini

Hi,

On 15/09/19 16:22, Jason Merrill wrote:

On 9/12/19 9:41 AM, Paolo Carlini wrote:

+  if (!valid_array_size_p (dname
+   ? declarator->id_loc : input_location,


Use the id_loc local variable?


This diagnostic is inside the loop over declarator->declarator. 
Eventually, outside the loop, the id_loc local is updated to the final 
declarator->id_loc or input_location. Norrmally in the loop we use the 
current declarator->id_loc: what I tested seems more correct to me (we 
have to account for input_location too because valid_array_size_p, 
shared with the C front-end, wants a sound location)


Paolo.



Re: [RFA] [tree-optimization/80576] Handle non-constant sizes in DSE

2019-09-16 Thread Richard Biener
On Mon, Sep 16, 2019 at 11:12 AM Richard Biener
 wrote:
>
> On Mon, Sep 9, 2019 at 10:10 PM Jeff Law  wrote:
> >
> > On 8/26/19 3:00 AM, Richard Biener wrote:
> > > On Fri, Aug 23, 2019 at 9:19 PM Jeff Law  wrote:
> > >>
> > >> On 8/22/19 4:46 AM, Richard Biener wrote:
> > > Also you seem to use this info to constrain optimization when you
> > > might remember that types of addresses do not carry such 
> > > information...
> > > Thus it should be "trivially" possible to write a testcase that is 
> > > miscompiled
> > > after your patch.  I also don't see this really exercised in the
> > > testcases you add?
> >  Arggh.  You're absolutely correct.  I must be blocking out that entire
> >  discussion from last summer due to the trama :-)
> > 
> >  If the destination is the address of a _DECL node, can we use the size
> >  of the _DECL?
> > >>>
> > >>> Yes, but this should already happen for both invariant ones like 
> > >>> and variant ones like [i].c in ao_ref_init_from_ptr_and_size.
> > >> I don't see that in ao_ref_init_from_ptr_and_size.  AFAICT if you don't
> > >> know the size when you call that routine (size == NULL), then you end up
> > >> with the ref->size and ref->max_size set to -1.
> > >>
> > >> Am I missing something here?
> > >
> > > Ah, of course.  ao_ref_from_ptr_and_size would need to be extended
> > > to constrain max_size.  So what I was
> > > saying is that ao_ref_init_from_ptr_and_size should get you
> > > a DECL ao_ref_base () from which you could constrain max_size with.
> > > Or rather ao_ref_from_ptr_and_size should be extended do that,
> > > mimicing what get_ref_base_and_extent does at the end in the
> > > if (DECL_P (exp)) case (mind flag_unconstrained_commons!).
> > So I was going to use get_ref_base_and_extent from within
> > ao_ref_init_from_ptr_and_size to capture these cases, but
> > get_ref_base_and_extent internally uses TYPE_SIZE to get the maximum
> > size of the referenced object.
> >
> > That likely represents a codegen bug waiting to happen.
>
> Yeah, you can't use get_ref_base_and_extent literally here.
>
> > I'll see if I can refactor just the bits we want so that we're not
> > duplicating anything.
>
> Not sure if that's too important.  But yes, splitting out
>
>   if (DECL_P (exp))
> {
>   if (VAR_P (exp)
>   && ((flag_unconstrained_commons && DECL_COMMON (exp))
>   || (DECL_EXTERNAL (exp) && seen_variable_array_ref)))
> {
>   tree sz_tree = TYPE_SIZE (TREE_TYPE (exp));
>   /* If size is unknown, or we have read to the end, assume there
>  may be more to the structure than we are told.  */
>   if (TREE_CODE (TREE_TYPE (exp)) == ARRAY_TYPE
>   || (seen_variable_array_ref
>   && (sz_tree == NULL_TREE
>   || !poly_int_tree_p (sz_tree)
>   || maybe_eq (bit_offset + maxsize,
>wi::to_poly_offset (sz_tree)
> maxsize = -1;
> }
>   /* If maxsize is unknown adjust it according to the size of the
>  base decl.  */
>   else if (!known_size_p (maxsize)
>&& DECL_SIZE (exp)
>&& poly_int_tree_p (DECL_SIZE (exp)))
> maxsize = wi::to_poly_offset (DECL_SIZE (exp)) - bit_offset;
> }
>   else if (CONSTANT_CLASS_P (exp))
> {
>   /* If maxsize is unknown adjust it according to the size of the
>  base type constant.  */
>   if (!known_size_p (maxsize)
>   && TYPE_SIZE (TREE_TYPE (exp))
>   && poly_int_tree_p (TYPE_SIZE (TREE_TYPE (exp
> maxsize = (wi::to_poly_offset (TYPE_SIZE (TREE_TYPE (exp)))
>- bit_offset);
> }
>
> into a helper with just the computed offset as argument
> (plus maybe that seen_variable_array_ref which is meaningless
> for the address case or rather has to be assumed true(?)) should be possible.

Btw, for ao_ref_init_from_ptr_and_size we really want something like
get_ref_base_and_extent, just not literal.  Given struct { int i; int
b[4]; int j; }
and [i] get_addr_base_and_unit_offset currently returns NULL
but for ao_ref_init_from_ptr_and_size we like to see  as base
(we're getting that already) plus offset = 32, max_size = 160
(including the 'j' member of the decl).  Note if it's >b[i] max_size
needs to be -1 since there may be more after the structure, we can
really only prune max_size based on an actual object size.

But getting ->offset better is of course independent on max_size pruning.

Richard.


> Richard.
>
> >
> > Jeff


Re: [PING^2][PATCH 0/3] GNAT test suite fixes for build sysroot

2019-09-16 Thread Arnaud Charlet
> > In the course of setting up GCC regression testing for the RISC-V target 
> > I have discovered that the GNAT test suite does not correctly respond to 
> > the test environment settings passed from the test harness in my setup and 
> > consequently no test case works correctly.
> 
>  Ping for:
> 
> 
> 

Assuming good Ada test results with these changes for both
RISC-V and native linux x86_64, OK for me.

Arno


Re: [RFA] [tree-optimization/80576] Handle non-constant sizes in DSE

2019-09-16 Thread Richard Biener
On Mon, Sep 9, 2019 at 10:10 PM Jeff Law  wrote:
>
> On 8/26/19 3:00 AM, Richard Biener wrote:
> > On Fri, Aug 23, 2019 at 9:19 PM Jeff Law  wrote:
> >>
> >> On 8/22/19 4:46 AM, Richard Biener wrote:
> > Also you seem to use this info to constrain optimization when you
> > might remember that types of addresses do not carry such information...
> > Thus it should be "trivially" possible to write a testcase that is 
> > miscompiled
> > after your patch.  I also don't see this really exercised in the
> > testcases you add?
>  Arggh.  You're absolutely correct.  I must be blocking out that entire
>  discussion from last summer due to the trama :-)
> 
>  If the destination is the address of a _DECL node, can we use the size
>  of the _DECL?
> >>>
> >>> Yes, but this should already happen for both invariant ones like 
> >>> and variant ones like [i].c in ao_ref_init_from_ptr_and_size.
> >> I don't see that in ao_ref_init_from_ptr_and_size.  AFAICT if you don't
> >> know the size when you call that routine (size == NULL), then you end up
> >> with the ref->size and ref->max_size set to -1.
> >>
> >> Am I missing something here?
> >
> > Ah, of course.  ao_ref_from_ptr_and_size would need to be extended
> > to constrain max_size.  So what I was
> > saying is that ao_ref_init_from_ptr_and_size should get you
> > a DECL ao_ref_base () from which you could constrain max_size with.
> > Or rather ao_ref_from_ptr_and_size should be extended do that,
> > mimicing what get_ref_base_and_extent does at the end in the
> > if (DECL_P (exp)) case (mind flag_unconstrained_commons!).
> So I was going to use get_ref_base_and_extent from within
> ao_ref_init_from_ptr_and_size to capture these cases, but
> get_ref_base_and_extent internally uses TYPE_SIZE to get the maximum
> size of the referenced object.
>
> That likely represents a codegen bug waiting to happen.

Yeah, you can't use get_ref_base_and_extent literally here.

> I'll see if I can refactor just the bits we want so that we're not
> duplicating anything.

Not sure if that's too important.  But yes, splitting out

  if (DECL_P (exp))
{
  if (VAR_P (exp)
  && ((flag_unconstrained_commons && DECL_COMMON (exp))
  || (DECL_EXTERNAL (exp) && seen_variable_array_ref)))
{
  tree sz_tree = TYPE_SIZE (TREE_TYPE (exp));
  /* If size is unknown, or we have read to the end, assume there
 may be more to the structure than we are told.  */
  if (TREE_CODE (TREE_TYPE (exp)) == ARRAY_TYPE
  || (seen_variable_array_ref
  && (sz_tree == NULL_TREE
  || !poly_int_tree_p (sz_tree)
  || maybe_eq (bit_offset + maxsize,
   wi::to_poly_offset (sz_tree)
maxsize = -1;
}
  /* If maxsize is unknown adjust it according to the size of the
 base decl.  */
  else if (!known_size_p (maxsize)
   && DECL_SIZE (exp)
   && poly_int_tree_p (DECL_SIZE (exp)))
maxsize = wi::to_poly_offset (DECL_SIZE (exp)) - bit_offset;
}
  else if (CONSTANT_CLASS_P (exp))
{
  /* If maxsize is unknown adjust it according to the size of the
 base type constant.  */
  if (!known_size_p (maxsize)
  && TYPE_SIZE (TREE_TYPE (exp))
  && poly_int_tree_p (TYPE_SIZE (TREE_TYPE (exp
maxsize = (wi::to_poly_offset (TYPE_SIZE (TREE_TYPE (exp)))
   - bit_offset);
}

into a helper with just the computed offset as argument
(plus maybe that seen_variable_array_ref which is meaningless
for the address case or rather has to be assumed true(?)) should be possible.

Richard.

>
> Jeff


Re: [PATCH 5/5] Rewrite second part of or_comparisons_1 into match.pd.

2019-09-16 Thread Richard Biener
On Wed, 11 Sep 2019, Martin Liška wrote:

> Hi.
> 
> Updated version of the patch that drops GENERIC
> support in TREE codes.

+  (bit_ior (code1:c@3 @0 INTEGER_CST@1) (code2:c@4 @0 INTEGER_CST@2))

the :c on both code1 and code2 are not necessary since the constant
is always second via canonicalization.

OK with that removed.

Thanks,
Richard.

Re: [PATCH 3/5] Rewrite part of and_comparisons_1 into match.pd.

2019-09-16 Thread Richard Biener
On Wed, 11 Sep 2019, Martin Liška wrote:

> On 9/11/19 3:19 PM, Martin Liška wrote:
> > On 9/11/19 2:43 PM, Richard Biener wrote:
> >> Any particular reason you needed to swap the calls in
> >> maybe_fold_and/or_comparisons?

You didn't answer this, besides that the patch is OK.

Thanks,
Richard.

Re: [PATCH 4/5] Rewrite first part of or_comparisons_1 into match.pd.

2019-09-16 Thread Richard Biener
On Wed, 11 Sep 2019, Martin Liška wrote:

> Hi.
> 
> Updated version of the patch that drops GENERIC
> support in TREE codes.

OK.

Thanks,
Richard.

Re: [PATCH] Fix up sqrt(x) < c and sqrt(x) >= c match.pd folding (PR tree-optimization/91734)

2019-09-16 Thread Richard Biener
On Sat, 14 Sep 2019, Jakub Jelinek wrote:

> Hi!
> 
> As mentioned in the PR, the sqrt (x) < c optimization into x < c*c
> sometimes breaks the boundary case, if c2=c*c is inexact then in some cases
> we need to optimize it into x <= c*c rather than x < c*c.  The original
> bugreport is when c is small and c2 is 0.0, then obviously we need <= 0.0
> rather than < 0.0, but the testcase includes another example where it makes
> a difference, plus has a >= testcase too.
> 
> Bootstrapped/regtested on powerpc64le-linux, ok for trunk?

I was hoping Joseph might chime in here...  anyway, does this assume
round-to-nearest or does it work with round to +-Inf as well?  I
realize this all is under flag_unsafe_math_optimizations, but
this flag is notoriously underspecified...  So the question is
whether we should disable the transform if c*c isn't exact and
flag_rounding_math?  The transform also doesn't seem to guard
against isnan (c) (-funsafe-math-optimizations sets
-fno-trapping-math and -fno-signed-zeros but not -ffinite-math-only
or disables itself on -frounding-math)

Otherwise the patch looks OK to me.

Thanks,
Richard.


> 2019-09-13  Jakub Jelinek  
> 
>   PR tree-optimization/91734
>   * generic-match-head.c: Include fold-const-call.h.
>   * match.pd (sqrt(x) < c, sqrt(x) >= c): Check the boundary value and
>   in case inexact computation of c*c affects comparison of the boundary,
>   turn LT_EXPR into LE_EXPR or GE_EXPR into GT_EXPR.
> 
>   * gcc.dg/pr91734.c: New test.
> 
> --- gcc/generic-match-head.c.jj   2019-07-20 21:02:09.296821929 +0200
> +++ gcc/generic-match-head.c  2019-09-12 10:52:33.091366624 +0200
> @@ -29,6 +29,7 @@ along with GCC; see the file COPYING3.
>  #include "cgraph.h"
>  #include "vec-perm-indices.h"
>  #include "fold-const.h"
> +#include "fold-const-call.h"
>  #include "stor-layout.h"
>  #include "tree-dfa.h"
>  #include "builtins.h"
> --- gcc/match.pd.jj   2019-09-11 21:50:54.933504293 +0200
> +++ gcc/match.pd  2019-09-12 11:12:11.150987786 +0200
> @@ -3541,56 +3541,71 @@ (define_operator_list COND_TERNARY
> if x is negative or NaN.  Due to -funsafe-math-optimizations,
> the results for other x follow from natural arithmetic.  */
> (cmp @0 @1)))
> - (if (cmp == GT_EXPR || cmp == GE_EXPR)
> + (if (cmp == LT_EXPR || cmp == LE_EXPR || cmp == GT_EXPR || cmp == 
> GE_EXPR)
>(with
> {
> - REAL_VALUE_TYPE c2;
> +  REAL_VALUE_TYPE c2;
> +  enum tree_code ncmp = cmp;
>real_arithmetic (, MULT_EXPR,
> _REAL_CST (@1), _REAL_CST (@1));
>real_convert (, TYPE_MODE (TREE_TYPE (@0)), );
> +  /* See PR91734: if c2 is inexact and sqrt(c2) < c (or sqrt(c2) >= c),
> + then change LT_EXPR into LE_EXPR or GE_EXPR into GT_EXPR.  */
> +  if ((cmp == LT_EXPR || cmp == GE_EXPR) && !REAL_VALUE_ISINF (c2))
> +{
> +  tree c3 = fold_const_call (CFN_SQRT, TREE_TYPE (@0),
> + build_real (TREE_TYPE (@0), c2));
> +  if (c3 == NULL_TREE || TREE_CODE (c3) != REAL_CST)
> +ncmp = ERROR_MARK;
> +  else if (real_less (_REAL_CST (c3), _REAL_CST (@1)))
> +ncmp = cmp == LT_EXPR ? LE_EXPR : GT_EXPR;
> +}
> }
> -   (if (REAL_VALUE_ISINF (c2))
> - /* sqrt(x) > y is x == +Inf, when y is very large.  */
> - (if (HONOR_INFINITIES (@0))
> -  (eq @0 { build_real (TREE_TYPE (@0), c2); })
> -  { constant_boolean_node (false, type); })
> - /* sqrt(x) > c is the same as x > c*c.  */
> - (cmp @0 { build_real (TREE_TYPE (@0), c2); }
> - (if (cmp == LT_EXPR || cmp == LE_EXPR)
> -  (with
> -   {
> -  REAL_VALUE_TYPE c2;
> -  real_arithmetic (, MULT_EXPR,
> -   _REAL_CST (@1), _REAL_CST (@1));
> -  real_convert (, TYPE_MODE (TREE_TYPE (@0)), );
> -   }
> -   (if (REAL_VALUE_ISINF (c2))
> -(switch
> -  /* sqrt(x) < y is always true, when y is a very large
> - value and we don't care about NaNs or Infinities.  */
> -  (if (! HONOR_NANS (@0) && ! HONOR_INFINITIES (@0))
> -   { constant_boolean_node (true, type); })
> -  /* sqrt(x) < y is x != +Inf when y is very large and we
> - don't care about NaNs.  */
> -  (if (! HONOR_NANS (@0))
> -   (ne @0 { build_real (TREE_TYPE (@0), c2); }))
> -  /* sqrt(x) < y is x >= 0 when y is very large and we
> - don't care about Infinities.  */
> -  (if (! HONOR_INFINITIES (@0))
> -   (ge @0 { build_real (TREE_TYPE (@0), dconst0); }))
> -  /* sqrt(x) < y is x >= 0 && x != +Inf, when y is large.  */
> -  (if (GENERIC)
> -   (truth_andif
> -(ge @0 { build_real (TREE_TYPE (@0), dconst0); })
> -(ne @0 { build_real (TREE_TYPE (@0), c2); }
> - /* sqrt(x) < c is the same as x < c*c, if we ignore NaNs.  */
> - (if (! HONOR_NANS (@0))
> -  (cmp @0 { build_real 

Re: [PATCH 3/9] operand_equal_p: add support for OBJ_TYPE_REF.

2019-09-16 Thread Richard Biener
On Fri, Sep 13, 2019 at 2:30 PM Martin Liška  wrote:
>
> On 8/20/19 8:39 AM, Richard Biener wrote:
> > Anyhow, the original patch is OK if you compare
> > OBJ_TYPE_REF_TOKEN/OBJECT irrespective of virtual_method_call_p
> > and order the types_same_for_odr last since that's most expensive.
>
> Hi.
>
> It's done in the attached patch that survives bootstrap and regression
> tests.

OK.

Richard.

> Martin