Re: [PATCH 04/11] cris: Update unexpected empty split condition

2021-06-02 Thread Kewen.Lin via Gcc-patches
Hi Nilsson,

on 2021/6/2 下午8:45, Hans-Peter Nilsson wrote:
>> From: Kewen Lin 
>> Date: Wed, 2 Jun 2021 07:04:54 +0200
> 
>> gcc/ChangeLog:
>>
>>  * config/cris/cris.md (*addi_reload): Fix empty split condition.
>> ---
>>  gcc/config/cris/cris.md | 2 +-
>>  1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/gcc/config/cris/cris.md b/gcc/config/cris/cris.md
>> index 7de0ec63fcf..d5a3c703a83 100644
>> --- a/gcc/config/cris/cris.md
>> +++ b/gcc/config/cris/cris.md
>> @@ -1311,7 +1311,7 @@ (define_insn_and_split "*addi_reload"
>> && (INTVAL (operands[3]) == 2 || INTVAL (operands[3]) == 4)
>> && (reload_in_progress || reload_completed)"
>>"#"
>> -  ""
>> +  "&& 1"
>>[(set (match_dup 0)
>>  (plus:SI (ashift:SI (match_dup 2) (match_dup 3)) (match_dup 1)))]
>>"operands[3] = operands[3] == const2_rtx ? const1_rtx : const2_rtx;")
>> -- 
>> 2.17.1
>>
> 
> Ok, thanks, if only for all-round consistency.
> 
> In preparation for a warning for an empty condition?  I'm
> usually all for .md-warnings, but I'm not sure about the
> benefit of that one, though.  Those "&& 1" look...hackish.

Thanks!  Yeah, the 01/11 patch aims to raise one error message
for the define_insn_and_split whose split condition is empty
while insn condition isn't.  In most cases, when we write one
define_insn_and_split we want the splitting only to take effect
while we see the define_insn matching happen (insn cond holds),
but if we leave the split condition empty, the splitting will
be done always, it could result in some unexpected consequence.
Mostly this is unintentional.  The error message is to avoid
people to make it unintentionally.

As you may have seen from the discussion under the 00/11 thread,
we will probably end up with some other solution, so I will hold
the changes for the ports, sorry for wasting your time and the
other port maintainers'.

BR,
Kewen


[PATCH] Improve match_simplify_replacement in phi-opt

2021-06-02 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

This improves match_simplify_replace in phi-opt to handle the
case where there is one cheap (non-call) preparation statement in the
middle basic block similar to xor_replacement and others.
This allows to remove xor_replacement which it does too.

OK?  Bootstrapped and tested on x86_64-linux-gnu with no regressions.

Thanks,
Andrew Pinski

Changes since v1:
v2 - change the check on the preparation statement to
allow only assignments and no calls and only assignments
that feed into the phi.

gcc/ChangeLog:

PR tree-optimization/25290
* tree-ssa-phiopt.c (xor_replacement): Delete.
(tree_ssa_phiopt_worker): Delete use of xor_replacement.
(match_simplify_replacement): Allow one cheap preparation
statement that can be moved to before the if.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/pr96928-1.c: Fix testcase for now that ~
happens on the outside of the bit_xor.
---
 gcc/testsuite/gcc.dg/tree-ssa/pr96928-1.c |   4 +-
 gcc/tree-ssa-phiopt.c | 164 +++---
 2 files changed, 54 insertions(+), 114 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr96928-1.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr96928-1.c
index a2770e5e896..2e86620da11 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr96928-1.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr96928-1.c
@@ -1,9 +1,9 @@
 /* PR tree-optimization/96928 */
 /* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-phiopt2" } */
+/* { dg-options "-O2 -fdump-tree-phiopt2 -fdump-tree-optimized" } */
 /* { dg-final { scan-tree-dump-times " = a_\[0-9]*\\\(D\\\) >> " 5 "phiopt2" } 
} */
 /* { dg-final { scan-tree-dump-times " = ~c_\[0-9]*\\\(D\\\);" 1 "phiopt2" } } 
*/
-/* { dg-final { scan-tree-dump-times " = ~" 1 "phiopt2" } } */
+/* { dg-final { scan-tree-dump-times " = ~" 1 "optimized" } } */
 /* { dg-final { scan-tree-dump-times " = \[abc_0-9\\\(\\\)D]* \\\^ " 5 
"phiopt2" } } */
 /* { dg-final { scan-tree-dump-not "a < 0" "phiopt2" } } */
 
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
index 969b868397e..ab852ea1ad4 100644
--- a/gcc/tree-ssa-phiopt.c
+++ b/gcc/tree-ssa-phiopt.c
@@ -28,6 +28,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "cfghooks.h"
 #include "tree-pass.h"
 #include "ssa.h"
+#include "tree-ssa.h"
 #include "optabs-tree.h"
 #include "insn-config.h"
 #include "gimple-pretty-print.h"
@@ -63,8 +64,6 @@ static bool minmax_replacement (basic_block, basic_block,
edge, edge, gphi *, tree, tree);
 static bool abs_replacement (basic_block, basic_block,
 edge, edge, gphi *, tree, tree);
-static bool xor_replacement (basic_block, basic_block,
-edge, edge, gphi *, tree, tree);
 static bool spaceship_replacement (basic_block, basic_block,
   edge, edge, gphi *, tree, tree);
 static bool cond_removal_in_popcount_clz_ctz_pattern (basic_block, basic_block,
@@ -352,9 +351,6 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool 
do_hoist_loads, bool early_p)
cfgchanged = true;
  else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
cfgchanged = true;
- else if (!early_p
-  && xor_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
-   cfgchanged = true;
  else if (!early_p
   && cond_removal_in_popcount_clz_ctz_pattern (bb, bb1, e1,
e2, phi, arg0,
@@ -801,14 +797,51 @@ match_simplify_replacement (basic_block cond_bb, 
basic_block middle_bb,
   edge true_edge, false_edge;
   gimple_seq seq = NULL;
   tree result;
-
-  if (!empty_block_p (middle_bb))
-return false;
+  gimple *stmt_to_move = NULL;
 
   /* Special case A ? B : B as this will always simplify to B. */
   if (operand_equal_for_phi_arg_p (arg0, arg1))
 return false;
 
+  /* If the basic block only has a cheap preparation statement,
+ allow it and move it once the transformation is done. */
+  if (!empty_block_p (middle_bb))
+{
+  stmt_to_move = last_and_only_stmt (middle_bb);
+  if (!stmt_to_move)
+   return false;
+
+  if (gimple_vuse (stmt_to_move))
+   return false;
+
+  if (gimple_could_trap_p (stmt_to_move)
+ || gimple_has_side_effects (stmt_to_move))
+   return false;
+
+  if (gimple_uses_undefined_value_p (stmt_to_move))
+   return false;
+
+  /* Allow assignments and not no calls.
+As const calls don't match any of the above, yet they could
+still have some side-effects - they could contain
+gimple_could_trap_p statements, like floating point
+exceptions or integer division by zero.  See PR70586.
+FIXME: perhaps gimple_has_side_effects or gimple_could_trap_p
+should handle this.  */
+  if (!is_gimple_assign (stmt_to_move))
+   return false;
+
+  tree lhs = gimple_get_lhs 

Re: [RFC/PATCH 00/11] Fix up some unexpected empty split conditions

2021-06-02 Thread Kewen.Lin via Gcc-patches
Hi Richi/Richard/Jeff/Segher,

Thanks for the comments!

on 2021/6/3 上午7:52, Segher Boessenkool wrote:
> On Wed, Jun 02, 2021 at 06:32:13PM +0100, Richard Sandiford wrote:
>> Richard Biener  writes:
>>> So what Richard suggests would be to disallow split conditions
>>> that do not start with "&& ", it's probably easy to do that as well
>>> and look for build fails.  That should catch all cases to look at.
>>
>> Yeah.  As a strawman proposal, how about:
>>
>> - add a new "define_independent_insn_and_split" that has the
>>   current semantics of define_insn_and_split.  This should be
>>   mechanical.
> 
> I'd rather not have that -- we can just write separate define_insn and
> define_split in that case.
> 

Not sure if someone would argue that he/she would like to go with one shared
pattern as before, to avoid any possible differences between two seperated
patterns and have good maintainability (like only editing on place) and
slightly better efficiency.

> How many such cases *are* there?  There are no users exposed to this,
> and when the split condition is required to start with "&&" (instead of
> getting that implied) it is not a silent change ever, either.
> 

If I read the proposal right, the explicit "&&" is only required when going
to find all potential problematic places for final implied "&&" change.
But one explicit "&&" does offer good readability.

>> - find the define_insn_and_splits that are missing the "&&", and where
>>   missing the "&&" might make a difference.  Change them to
>>   define_independent_insn_and_splits.
>>
>>   Like Richard says, this can be done by temporarily disallowing
>>   define_insn_and_splits that have no "&&".
> 
> If we make that change permanently, that is all steps we ever need!
> 

So the question is that: whether we need to demand an explicit "&&".
Richard's proposal is for answer "no" which aligns with Richi's auto
filling advice before.  I think it would result in fewer changes since
those places without explicit "&&" are mostly unintentional, all the jobs
are done by implied "&&".  Its downside seems to be bad readability, new
readers may take it as two seperated conditions at first glance, but I
guess if we emphasize this change in the document it would be fine?
Or emitting one warning if missing an explicit "&&"?

BR,
Kewen
 
> Very old backends use the same insn condition and split condition
> sometimes still; it isn't hard to detect that as well, if that seems
> prudent.
> 
> 
> Segher
> 


Re: [PATCH,rs6000] Fix operand order to subf for p10 fusion.

2021-06-02 Thread Segher Boessenkool
Hi!

On Wed, Jun 02, 2021 at 11:05:00PM -0500, Aaron Sawdey wrote:
> This certainly causes a bootstrap miscompare, and might also be
> responsible for PR/100820. The operands to subf were reversed
> in the logical-add/sub fusion patterns, and I screwed up my
> bootstrap test which is how it ended up getting committed.
> 
> If bootstrap and regtest passes, ok for trunk (and eventual backport to 11.2)?

Yes for all.  Thanks!

Super nit:

>   * gcc/config/rs6000/genfusion.pl (gen_logical_addsubf): Fix input
>   order to subf instruction.
>   * gcc/config/rs6000/fusion.md: Regenerate input.

It is not the *input* you regenerated ;-)  Just "Regenerate." is custom.


Segher


[PATCH,rs6000] Fix operand order to subf for p10 fusion.

2021-06-02 Thread Aaron Sawdey via Gcc-patches
This certainly causes a bootstrap miscompare, and might also be
responsible for PR/100820. The operands to subf were reversed
in the logical-add/sub fusion patterns, and I screwed up my
bootstrap test which is how it ended up getting committed.

If bootstrap and regtest passes, ok for trunk (and eventual backport to 11.2)?

Thanks!
   Aaron

gcc/ChangeLog

* gcc/config/rs6000/genfusion.pl (gen_logical_addsubf): Fix input
order to subf instruction.
* gcc/config/rs6000/fusion.md: Regenerate input.
---
 gcc/config/rs6000/fusion.md| 64 +-
 gcc/config/rs6000/genfusion.pl | 20 ++-
 2 files changed, 43 insertions(+), 41 deletions(-)

diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md
index 51912106663..e642ff5f95f 100644
--- a/gcc/config/rs6000/fusion.md
+++ b/gcc/config/rs6000/fusion.md
@@ -1733,10 +1733,10 @@
(clobber (match_scratch:GPR 4 "=X,X,X,"))]
   "(TARGET_P10_FUSION && TARGET_P10_FUSION_LOGADD)"
   "@
-   and %3,%1,%0\;subf %3,%3,%2
-   and %3,%1,%0\;subf %3,%3,%2
-   and %3,%1,%0\;subf %3,%3,%2
-   and %4,%1,%0\;subf %3,%4,%2"
+   and %3,%1,%0\;subf %3,%2,%3
+   and %3,%1,%0\;subf %3,%2,%3
+   and %3,%1,%0\;subf %3,%2,%3
+   and %4,%1,%0\;subf %3,%2,%4"
   [(set_attr "type" "fused_arith_logical")
(set_attr "cost" "6")
(set_attr "length" "8")])
@@ -1751,10 +1751,10 @@
(clobber (match_scratch:GPR 4 "=X,X,X,"))]
   "(TARGET_P10_FUSION && TARGET_P10_FUSION_LOGADD)"
   "@
-   nand %3,%1,%0\;subf %3,%3,%2
-   nand %3,%1,%0\;subf %3,%3,%2
-   nand %3,%1,%0\;subf %3,%3,%2
-   nand %4,%1,%0\;subf %3,%4,%2"
+   nand %3,%1,%0\;subf %3,%2,%3
+   nand %3,%1,%0\;subf %3,%2,%3
+   nand %3,%1,%0\;subf %3,%2,%3
+   nand %4,%1,%0\;subf %3,%2,%4"
   [(set_attr "type" "fused_arith_logical")
(set_attr "cost" "6")
(set_attr "length" "8")])
@@ -1769,10 +1769,10 @@
(clobber (match_scratch:GPR 4 "=X,X,X,"))]
   "(TARGET_P10_FUSION && TARGET_P10_FUSION_LOGADD)"
   "@
-   nor %3,%1,%0\;subf %3,%3,%2
-   nor %3,%1,%0\;subf %3,%3,%2
-   nor %3,%1,%0\;subf %3,%3,%2
-   nor %4,%1,%0\;subf %3,%4,%2"
+   nor %3,%1,%0\;subf %3,%2,%3
+   nor %3,%1,%0\;subf %3,%2,%3
+   nor %3,%1,%0\;subf %3,%2,%3
+   nor %4,%1,%0\;subf %3,%2,%4"
   [(set_attr "type" "fused_arith_logical")
(set_attr "cost" "6")
(set_attr "length" "8")])
@@ -1787,10 +1787,10 @@
(clobber (match_scratch:GPR 4 "=X,X,X,"))]
   "(TARGET_P10_FUSION && TARGET_P10_FUSION_LOGADD)"
   "@
-   or %3,%1,%0\;subf %3,%3,%2
-   or %3,%1,%0\;subf %3,%3,%2
-   or %3,%1,%0\;subf %3,%3,%2
-   or %4,%1,%0\;subf %3,%4,%2"
+   or %3,%1,%0\;subf %3,%2,%3
+   or %3,%1,%0\;subf %3,%2,%3
+   or %3,%1,%0\;subf %3,%2,%3
+   or %4,%1,%0\;subf %3,%2,%4"
   [(set_attr "type" "fused_arith_logical")
(set_attr "cost" "6")
(set_attr "length" "8")])
@@ -1805,10 +1805,10 @@
(clobber (match_scratch:GPR 4 "=X,X,X,"))]
   "(TARGET_P10_FUSION && TARGET_P10_FUSION_LOGADD)"
   "@
-   and %3,%1,%0\;subf %3,%2,%3
-   and %3,%1,%0\;subf %3,%2,%3
-   and %3,%1,%0\;subf %3,%2,%3
-   and %4,%1,%0\;subf %3,%2,%4"
+   and %3,%1,%0\;subf %3,%3,%2
+   and %3,%1,%0\;subf %3,%3,%2
+   and %3,%1,%0\;subf %3,%3,%2
+   and %4,%1,%0\;subf %3,%4,%2"
   [(set_attr "type" "fused_arith_logical")
(set_attr "cost" "6")
(set_attr "length" "8")])
@@ -1823,10 +1823,10 @@
(clobber (match_scratch:GPR 4 "=X,X,X,"))]
   "(TARGET_P10_FUSION && TARGET_P10_FUSION_LOGADD)"
   "@
-   nand %3,%1,%0\;subf %3,%2,%3
-   nand %3,%1,%0\;subf %3,%2,%3
-   nand %3,%1,%0\;subf %3,%2,%3
-   nand %4,%1,%0\;subf %3,%2,%4"
+   nand %3,%1,%0\;subf %3,%3,%2
+   nand %3,%1,%0\;subf %3,%3,%2
+   nand %3,%1,%0\;subf %3,%3,%2
+   nand %4,%1,%0\;subf %3,%4,%2"
   [(set_attr "type" "fused_arith_logical")
(set_attr "cost" "6")
(set_attr "length" "8")])
@@ -1841,10 +1841,10 @@
(clobber (match_scratch:GPR 4 "=X,X,X,"))]
   "(TARGET_P10_FUSION && TARGET_P10_FUSION_LOGADD)"
   "@
-   nor %3,%1,%0\;subf %3,%2,%3
-   nor %3,%1,%0\;subf %3,%2,%3
-   nor %3,%1,%0\;subf %3,%2,%3
-   nor %4,%1,%0\;subf %3,%2,%4"
+   nor %3,%1,%0\;subf %3,%3,%2
+   nor %3,%1,%0\;subf %3,%3,%2
+   nor %3,%1,%0\;subf %3,%3,%2
+   nor %4,%1,%0\;subf %3,%4,%2"
   [(set_attr "type" "fused_arith_logical")
(set_attr "cost" "6")
(set_attr "length" "8")])
@@ -1859,10 +1859,10 @@
(clobber (match_scratch:GPR 4 "=X,X,X,"))]
   "(TARGET_P10_FUSION && TARGET_P10_FUSION_LOGADD)"
   "@
-   or %3,%1,%0\;subf %3,%2,%3
-   or %3,%1,%0\;subf %3,%2,%3
-   or %3,%1,%0\;subf %3,%2,%3
-   or %4,%1,%0\;subf %3,%2,%4"
+   or %3,%1,%0\;subf %3,%3,%2
+   or %3,%1,%0\;subf %3,%3,%2
+   or %3,%1,%0\;subf %3,%3,%2
+   or %4,%1,%0\;subf %3,%4,%2"
   [(set_attr "type" "fused_arith_logical")
(set_attr "cost" "6")
(set_attr "length" "8")])
diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl
index 1285dd42043..577b9553deb 100755
--- a/gcc/config/rs6000/genfusion.pl
+++ b/gcc/config/rs6000/genfusion.pl
@@ -166,7 +166,7 @@ sub 

Re: [PATCH] c++: cv-qualified dependent name of alias tmpl [PR100592]

2021-06-02 Thread Jason Merrill via Gcc-patches

On 6/2/21 7:05 PM, Patrick Palka wrote:

On Wed, 2 Jun 2021, Jason Merrill wrote:


On 6/2/21 4:56 PM, Patrick Palka wrote:

On Wed, 2 Jun 2021, Patrick Palka wrote:


On Wed, 2 Jun 2021, Jason Merrill wrote:


On 6/2/21 2:39 PM, Patrick Palka wrote:

Here, the dependent template name in the return type of f() resolves
to
an alias of int& after substitution, and we end up complaining about
qualifying this reference type with 'const' from
cp_build_qualified_type
rather than just silently dropping the qualification as per
[dcl.ref]/1.


Hmm, the patch looks fine, but why does the TYPE_DECL test fail for the
alias?


Ah, I hadn't considered investigating that.  It seems make_typename_type
always returns a _TYPE instead of a TYPE_DECL when resolving a dependent
name that's a template-id, regardless of the tf_keep_type_decl flag.
This can be easily fixed like so, and this change alone is sufficient to
fix the PR (no changes to qualttp20.C needed).  Note that this change
should only have an effect when tf_keep_type_decl is passed to
make_typename_type, and the only such caller is the TYPENAME_TYPE case
of tsubst in question, so this change seems pretty safe.

The downside is that we don't get the __restrict__-dropping
"improvement" as exhibited by qualttp20.C that the original patch
provides, so this other approach is more conservative in that sense.

So shall we go with the original patch, or something like the following?
(If we go with the original patch, it just occurred to me that we could
remove tf_keep_type_decl altogether.)  Testing in progress.


For sake of concreteness, here's the full alternative patch for
consideration (modulo ChangeLog):


This seems better.  I think the only non-type return from
lookup_template_class is error_mark_node; does it work to check that
specifically rather than !TYPE_P?


Indeed, checking for error_mark_node instead works nicely.  Does the
following look OK?  Bootstrapped and regtested on x86_64-pc-linux-gnu.


OK.


-- >8 --

Subject: [PATCH] c++: cv-qualified dependent name of alias tmpl [PR100592]

Here, the dependent template name in the return type of f() resolves to
an alias of int& after substitution, and we end up complaining about
qualifying this reference type with 'const' from cp_build_qualified_type
rather than just silently dropping the qualification as per [dcl.ref]/1.

The problem is ultimately that make_typename_type ignores the
tf_keep_type_decl flag when the dependent name is a template-id.  This
in turn causes the TYPE_DECL check within tsubst 
to fail, and so we end up not passing tf_ignore_bad_quals to
cp_build_qualified_type.  This patch fixes this by making
make_typename_type respect the tf_keep_type_decl flag even in the case
of a dependent template-id name.

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for
trunk?

PR c++/100592

gcc/cp/ChangeLog:

* decl.c (make_typename_type): After dispatching to
lookup_template_class, adjust the result to its TYPE_NAME
and then consider the tf_keep_type_decl flag.

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/alias-decl-71.C: New test.
* g++.dg/template/qualttp20.C: Remove dg-error and augment.
---
  gcc/cp/decl.c  | 13 +
  gcc/testsuite/g++.dg/cpp0x/alias-decl-71.C | 13 +
  2 files changed, 22 insertions(+), 4 deletions(-)
  create mode 100644 gcc/testsuite/g++.dg/cpp0x/alias-decl-71.C

diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c
index fb21a3a1ae8..a3687dbb0dd 100644
--- a/gcc/cp/decl.c
+++ b/gcc/cp/decl.c
@@ -4136,10 +4136,15 @@ make_typename_type (tree context, tree name, enum 
tag_types tag_type,
  return error_mark_node;
  
if (want_template)

-return lookup_template_class (t, TREE_OPERAND (fullname, 1),
- NULL_TREE, context,
- /*entering_scope=*/0,
- complain | tf_user);
+{
+  t = lookup_template_class (t, TREE_OPERAND (fullname, 1),
+NULL_TREE, context,
+/*entering_scope=*/0,
+complain | tf_user);
+  if (t == error_mark_node)
+   return error_mark_node;
+  t = TYPE_NAME (t);
+}

if (DECL_ARTIFICIAL (t) || !(complain & tf_keep_type_decl))

  t = TREE_TYPE (t);
diff --git a/gcc/testsuite/g++.dg/cpp0x/alias-decl-71.C 
b/gcc/testsuite/g++.dg/cpp0x/alias-decl-71.C
new file mode 100644
index 000..6a61f93a0b0
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/alias-decl-71.C
@@ -0,0 +1,13 @@
+// PR c++/100592
+// { dg-do compile { target c++11 } }
+
+template
+struct meta {
+  template using if_c = int&;
+};
+
+template
+typename meta::template if_c const f();
+
+using type = decltype(f());
+using type = int&;





[PATCH] x86: Convert CONST_WIDE_INT to broadcast in move expanders

2021-06-02 Thread H.J. Lu via Gcc-patches
Update move expanders to convert the CONST_WIDE_INT operand to vector
broadcast from a byte with AVX2.  Add ix86_gen_scratch_sse_rtx to
return a scratch SSE register which won't increase stack alignment
requirement and blocks transformation by the combine pass.

A small benchmark:

https://gitlab.com/x86-benchmarks/microbenchmark/-/tree/memset/broadcast

shows that broadcast is a little bit faster on Intel Core i7-8559U:

$ make
gcc -g -I. -O2   -c -o test.o test.c
gcc -g   -c -o memory.o memory.S
gcc -g   -c -o broadcast.o broadcast.S
gcc -o test test.o memory.o broadcast.o
./test
memory   : 99333
broadcast: 97208
$

broadcast is also smaller:

$ size memory.o broadcast.o
   textdata bss dec hex filename
132   0   0 132  84 memory.o
122   0   0 122  7a broadcast.o
$

gcc/

PR target/100865
* config/i386/i386-expand.c (ix86_expand_vector_init_duplicate):
New prototype.
(ix86_byte_broadcast): New function.
(ix86_convert_const_wide_int_to_broadcast): Likewise.
(ix86_expand_move): Try ix86_convert_const_wide_int_to_broadcast
if mode size is 16 bytes or bigger.
(ix86_expand_vector_move): Try
ix86_convert_const_wide_int_to_broadcast.
* config/i386/i386-protos.h (ix86_gen_scratch_sse_rtx): New
prototype.
* config/i386/i386.c (ix86_minimum_incoming_stack_boundary): Add
an argument to ignore stack_alignment_estimated.  It is passed
as false by default.
(ix86_gen_scratch_sse_rtx): New function.

gcc/testsuite/

PR target/100865
* gcc.target/i386/pr100865-1.c: New test.
* gcc.target/i386/pr100865-2.c: Likewise.
* gcc.target/i386/pr100865-3.c: Likewise.
* gcc.target/i386/pr100865-4.c: Likewise.
* gcc.target/i386/pr100865-5.c: Likewise.
---
 gcc/config/i386/i386-expand.c  | 103 ++---
 gcc/config/i386/i386-protos.h  |   2 +
 gcc/config/i386/i386.c |  50 +-
 gcc/testsuite/gcc.target/i386/pr100865-1.c |  13 +++
 gcc/testsuite/gcc.target/i386/pr100865-2.c |  14 +++
 gcc/testsuite/gcc.target/i386/pr100865-3.c |  15 +++
 gcc/testsuite/gcc.target/i386/pr100865-4.c |  16 
 gcc/testsuite/gcc.target/i386/pr100865-5.c |  17 
 8 files changed, 215 insertions(+), 15 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr100865-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr100865-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr100865-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr100865-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr100865-5.c

diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index 4185f58eed5..658adafa269 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -93,6 +93,9 @@ along with GCC; see the file COPYING3.  If not see
 #include "i386-builtins.h"
 #include "i386-expand.h"
 
+static bool ix86_expand_vector_init_duplicate (bool, machine_mode, rtx,
+  rtx);
+
 /* Split one or more double-mode RTL references into pairs of half-mode
references.  The RTL can be REG, offsettable MEM, integer constant, or
CONST_DOUBLE.  "operands" is a pointer to an array of double-mode RTLs to
@@ -190,6 +193,65 @@ ix86_expand_clear (rtx dest)
   emit_insn (tmp);
 }
 
+/* Return a byte value which V can be broadcasted from.  Otherwise,
+   return INT_MAX.  */
+
+static int
+ix86_byte_broadcast (HOST_WIDE_INT v)
+{
+  wide_int val = wi::uhwi (v, HOST_BITS_PER_WIDE_INT);
+  int byte_broadcast = wi::extract_uhwi (val, 0, BITS_PER_UNIT);
+  for (unsigned int i = BITS_PER_UNIT;
+   i < HOST_BITS_PER_WIDE_INT;
+   i += BITS_PER_UNIT)
+{
+  int byte = wi::extract_uhwi (val, i, BITS_PER_UNIT);
+  if (byte_broadcast != byte)
+   return INT_MAX;
+}
+  return byte_broadcast;
+}
+
+/* Convert the CONST_WIDE_INT operand OP to broadcast in MODE.  */
+
+static rtx
+ix86_convert_const_wide_int_to_broadcast (machine_mode mode, rtx op)
+{
+  rtx target = nullptr;
+
+  /* Convert CONST_WIDE_INT to broadcast only if vector broadcast is
+ available.  */
+  if (!TARGET_AVX2 || !CONST_WIDE_INT_P (op))
+return target;
+
+  HOST_WIDE_INT val = CONST_WIDE_INT_ELT (op, 0);
+  int byte_broadcast = ix86_byte_broadcast (val);
+
+  if (byte_broadcast == INT_MAX)
+return target;
+
+  /* Check if OP1 can be broadcasted from VAL.  */
+  for (int i = 1; i < CONST_WIDE_INT_NUNITS (op); i++)
+if (val != CONST_WIDE_INT_ELT (op, i))
+  return target;
+
+  unsigned int nunits = GET_MODE_SIZE (mode) / GET_MODE_SIZE (QImode);
+  machine_mode vector_mode;
+  if (!mode_for_vector (QImode, nunits).exists (_mode))
+gcc_unreachable ();
+  target = ix86_gen_scratch_sse_rtx (vector_mode, true);
+  rtx byte = GEN_INT ((char) byte_broadcast);
+  if (!ix86_expand_vector_init_duplicate 

[PATCH v2] predcom: Enabled by loop vect at O2 [PR100794]

2021-06-02 Thread Kewen.Lin via Gcc-patches
Hi Richard,

on 2021/6/3 上午1:19, Richard Sandiford wrote:
> "Kewen.Lin via Gcc-patches"  writes:
>> Hi,
>>
>> As PR100794 shows, in the current implementation PRE bypasses
>> some optimization to avoid introducing loop carried dependence
>> which stops loop vectorizer to vectorize the loop.  At -O2,
>> there is no downstream pass to re-catch this kind of opportunity
>> if loop vectorizer fails to vectorize that loop.
>>
>> This patch follows Richi's suggestion in the PR, if predcom flag
>> isn't set and loop vectorization will enable predcom without any
>> unrolling implicitly.  The Power9 SPEC2017 evaluation showed it
>> can speed up 521.wrf_r 3.30% and 554.roms_r 1.08% at very-cheap
>> cost model, no remarkable impact at cheap cost model, the build
>> time and size impact is fine (see the PR for the details).
>>
>> By the way, I tested another proposal to guard PRE not skip the
>> optimization for cheap and very-cheap vect cost models, the
>> evaluation results showed it's fine with very cheap cost model,
>> but it can degrade some bmks like 521.wrf_r -9.17% and
>> 549.fotonik3d_r -2.07% etc.
>>
>> Bootstrapped/regtested on powerpc64le-linux-gnu P9,
>> x86_64-redhat-linux and aarch64-linux-gnu.
>>
>> Is it ok for trunk?
>>
>> BR,
>> Kewen
>> -
>> gcc/ChangeLog:
>>
>>  PR tree-optimization/100794
>>  * tree-predcom.c (tree_predictive_commoning_loop): Add parameter
>>  allow_unroll_p and only allow unrolling when it's true.
>>  (tree_predictive_commoning): Add parameter allow_unroll_p and
>>  adjust for it.
>>  (run_tree_predictive_commoning): Likewise.
>>  (class pass_predcom): Add private member allow_unroll_p.
>>  (pass_predcom::pass_predcom): Init allow_unroll_p.
>>  (pass_predcom::gate): Check flag_tree_loop_vectorize and 
>>  global_options_set.x_flag_predictive_commoning.
>>  (pass_predcom::execute): Adjust for allow_unroll_p.
>>
>> gcc/testsuite/ChangeLog:
>>
>>  PR tree-optimization/100794
>>  * gcc.dg/tree-ssa/pr100794.c: New test.
>>
>>  gcc/testsuite/gcc.dg/tree-ssa/pr100794.c | 20 +
>>  gcc/tree-predcom.c   | 57 +---
>>  2 files changed, 60 insertions(+), 17 deletions(-)
>>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr100794.c
>>
>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr100794.c 
>> b/gcc/testsuite/gcc.dg/tree-ssa/pr100794.c
>> new file mode 100644
>> index 000..6f707ae7fba
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr100794.c
>> @@ -0,0 +1,20 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-O2 -ftree-loop-vectorize -fdump-tree-pcom-details 
>> -fdisable-tree-vect" } */
>> +
>> +extern double arr[100];
>> +extern double foo (double, double);
>> +extern double sum;
>> +
>> +void
>> +test (int i_0, int i_n)
>> +{
>> +  int i;
>> +  for (i = i_0; i < i_n - 1; i++)
>> +{
>> +  double a = arr[i];
>> +  double b = arr[i + 1];
>> +  sum += a * b;
>> +}
>> +}
>> +
>> +/* { dg-final { scan-tree-dump "Executing predictive commoning without 
>> unrolling" "pcom" } } */
>> diff --git a/gcc/tree-predcom.c b/gcc/tree-predcom.c
>> index 02f911a08bb..65a93c8e505 100644
>> --- a/gcc/tree-predcom.c
>> +++ b/gcc/tree-predcom.c
>> @@ -3178,13 +3178,13 @@ insert_init_seqs (class loop *loop, vec 
>> chains)
>> applied to this loop.  */
>>  
>>  static unsigned
>> -tree_predictive_commoning_loop (class loop *loop)
>> +tree_predictive_commoning_loop (class loop *loop, bool allow_unroll_p)
>>  {
>>vec datarefs;
>>vec dependences;
>>struct component *components;
>>vec chains = vNULL;
>> -  unsigned unroll_factor;
>> +  unsigned unroll_factor = 0;
>>class tree_niter_desc desc;
>>bool unroll = false, loop_closed_ssa = false;
>>  
>> @@ -3272,11 +3272,13 @@ tree_predictive_commoning_loop (class loop *loop)
>>dump_chains (dump_file, chains);
>>  }
>>  
>> -  /* Determine the unroll factor, and if the loop should be unrolled, ensure
>> - that its number of iterations is divisible by the factor.  */
>> -  unroll_factor = determine_unroll_factor (chains);
>> -  unroll = (unroll_factor > 1
>> -&& can_unroll_loop_p (loop, unroll_factor, ));
>> +  if (allow_unroll_p)
>> +/* Determine the unroll factor, and if the loop should be unrolled, 
>> ensure
>> +   that its number of iterations is divisible by the factor.  */
>> +unroll_factor = determine_unroll_factor (chains);
>> +
>> +  if (unroll_factor > 1)
>> +unroll = can_unroll_loop_p (loop, unroll_factor, );
>>  
>>/* Execute the predictive commoning transformations, and possibly unroll 
>> the
>>   loop.  */
>> @@ -3319,7 +3321,7 @@ tree_predictive_commoning_loop (class loop *loop)
>>  /* Runs predictive commoning.  */
>>  
>>  unsigned
>> -tree_predictive_commoning (void)
>> +tree_predictive_commoning (bool allow_unroll_p)
>>  {
>>class loop *loop;
>>unsigned ret = 0, changed = 0;
>> @@ -3328,7 +3330,7 @@ tree_predictive_commoning 

Re: [PATCH 2/2] rs6000: Add test for _mm_minpos_epu16

2021-06-02 Thread Segher Boessenkool
On Wed, Jun 02, 2021 at 05:13:16PM -0500, Paul A. Clarke wrote:
> +  for (i = 0; i < NUM; i++)
> +src.s[i] = i * i - 68 * i + 1200;

Could you do tests with some identical elements as well?  Because that
is where I think it fails on BE currently.


Segher


Re: [PATCH 2/2, rs6000] Remove mode promotion for pseudos

2021-06-02 Thread HAO CHEN GUI via Gcc-patches

Hi,

  Gentle ping this:

  https://gcc.gnu.org/pipermail/gcc-patches/2021-May/570854.html

Thanks.

On 20/5/2021 下午 5:49, HAO CHEN GUI wrote:

Hi,

   The patch removes mode promotion for pseudos on rs6000 target.

   The attachments are the patch diff and change log file.

    Bootstrapped and tested on powerpc64le-linux and powerpc64-linux 
(with both m32 and m64) with no regressions. Is this okay for trunk? 
Any recommendations? Thanks a lot.




Re: [PATCH] rs6000: Support doubleword swaps removal in rot64 load store [PR100085]

2021-06-02 Thread Xionghu Luo via Gcc-patches
Hi,

On 2021/6/3 06:20, Segher Boessenkool wrote:
> On Wed, Jun 02, 2021 at 03:19:32AM -0500, Xionghu Luo wrote:
>> On P8LE, extra rot64+rot64 load or store instructions are generated
>> in float128 to vector __int128 conversion.
>>
>> This patch teaches pass swaps to also handle such pattens to remove
>> extra swap instructions.
> 
> Did you check if this is already handled by simplify-rtx if the mode had
> been TImode (not V1TImode)?  If not, why do you not handle it there?

I tried to do it in combine or peephole, the later pass split2
or split3 will still split it to rotate + rotate again as we have split
after reload, and this pattern is quite P8LE specific, so put it in pass
swap.  The simplify-rtx could simplify 
r124:KF#0=r123:KF#0<-<0x40<-<0x40 to r124:KF#0=r123:KF#0 for register
operations already.


vsx.md:

;; The post-reload split requires that we re-permute the source
;; register in case it is still live.
(define_split
  [(set (match_operand:VSX_LE_128 0 "memory_operand")
(match_operand:VSX_LE_128 1 "vsx_register_operand"))]
  "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed && !TARGET_P9_VECTOR
   && !altivec_indexed_or_indirect_operand (operands[0], mode)"
  [(const_int 0)]
{
  rs6000_emit_le_vsx_permute (operands[1], operands[1], mode);
  rs6000_emit_le_vsx_permute (operands[0], operands[1], mode);
  rs6000_emit_le_vsx_permute (operands[1], operands[1], mode);
  DONE;
})

 
Thanks,
Xionghu


Re: match.pd: ~X & Y to X ^ Y in some cases

2021-06-02 Thread Andrew Pinski via Gcc-patches
On Fri, May 13, 2016 at 12:07 PM Marc Glisse  wrote:
>
> Hello,
>
> maybe this would fit better in VRP, but it is easier (and not completely
> useless) to put it in match.pd.
>
> Since the transformation is restricted to GIMPLE, I think I don't need to
> check that @0 is SSA_NAME. I didn't test if @0 has pointer type before
> calling get_range_info because we are doing bit_not on it, but it looks
> like I should because we can do bitops on pointers?

I just noticed this was PR 52254 also :).  I closed it as fixed after
putting in a reference to the revision this was committed.

Thanks,
Andrew

>
> Adjustment for pr69270.c is exactly the same as in the previous patch from
> today :-)
>
> Bootstrap+regtest on powerpc64le-unknown-linux-gnu.
>
>
> 2016-05-16  Marc Glisse  
>
> gcc/
> * match.pd (~X & Y): New transformation.
>
> gcc/testsuite/
> * gcc.dg/tree-ssa/pr69270.c: Adjust.
> * gcc.dg/tree-ssa/andnot-1.c: New testcase.
>
>
> --
> Marc Glisse


Re: [PATCH 1/2] rs6000: Add support for _mm_minpos_epu16

2021-06-02 Thread Segher Boessenkool
Hi!

On Wed, Jun 02, 2021 at 05:13:15PM -0500, Paul A. Clarke wrote:
> Add a naive implementation of the subject x86 intrinsic to
> ease porting.

> +/* Return horizontal packed word minimum and its index in bits [15:0]
> +   and bits [18:16] respectively.  */
> +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, 
> __artificial__))
> +_mm_minpos_epu16 (__m128i __A)
> +{
> +  union __u
> +{
> +  __m128i __m;
> +  __v8hu __uh;
> +};
> +  union __u __u = { .__m = __A }, __r = { .__m = {0} };
> +  unsigned short __ridx = 0;
> +  unsigned short __rmin = __u.__uh[__ridx];
> +  for (unsigned long __i = __ridx+1;

(spaces around the "+"?)

> +   __i < sizeof (__u.__uh) / sizeof (__u.__uh[0]);

You should either use a macro for that, or just write "8" :-)

> +   __i++)
> +{
> +  if (__u.__uh[__i] < __rmin)
> +{
> +  __rmin = __u.__uh[__i];
> +  __ridx = __i;
> +}
> +}
> +  __r.__uh[0] = __rmin;
> +  __r.__uh[1] = __ridx;
> +  return __r.__m;
> +}

This does not compute the index correctly for big endian (it needs to
walk from right to left for that).  The construction of the return value
looks wrong as well.

Okay for trunk with that fixed.  Thanks!


Segher


Re: [RFC/PATCH 00/11] Fix up some unexpected empty split conditions

2021-06-02 Thread Segher Boessenkool
On Wed, Jun 02, 2021 at 06:32:13PM +0100, Richard Sandiford wrote:
> Richard Biener  writes:
> > So what Richard suggests would be to disallow split conditions
> > that do not start with "&& ", it's probably easy to do that as well
> > and look for build fails.  That should catch all cases to look at.
> 
> Yeah.  As a strawman proposal, how about:
> 
> - add a new "define_independent_insn_and_split" that has the
>   current semantics of define_insn_and_split.  This should be
>   mechanical.

I'd rather not have that -- we can just write separate define_insn and
define_split in that case.

How many such cases *are* there?  There are no users exposed to this,
and when the split condition is required to start with "&&" (instead of
getting that implied) it is not a silent change ever, either.

> - find the define_insn_and_splits that are missing the "&&", and where
>   missing the "&&" might make a difference.  Change them to
>   define_independent_insn_and_splits.
> 
>   Like Richard says, this can be done by temporarily disallowing
>   define_insn_and_splits that have no "&&".

If we make that change permanently, that is all steps we ever need!

Very old backends use the same insn condition and split condition
sometimes still; it isn't hard to detect that as well, if that seems
prudent.


Segher


Re: [PATCH 01/11] gen: Emit error msg for empty split condition

2021-06-02 Thread Segher Boessenkool
On Wed, Jun 02, 2021 at 04:18:46PM +0800, Kewen.Lin wrote:
> on 2021/6/2 下午3:43, Richard Biener wrote:
> Yes, the "" in split condition does mean 'true' (always).

Right -- which means it will be split whenever it matches.  This *can*
be intended, but in define_insn_and_split it is almost always a simple
bug.

> > Also "" as split condition _does_
> > seem valid, just maybe unintended?  
> 
> Yes, it's valid without this patch.  That's why I asked whether there is
> some good reason to keep it be [1].  In Segher's opinion, there is no
> good reason, he pointed out "A reader does not expect a
> define_insn_and_split to split any other insns."

Yes, but considering plain define_split, it can be wanted, esp. in
simpler backends that do not have a lot of historical baggage.

> > How would one create a
> > functionally equivalent example? "|| 1" will likely not work.
> 
> I think "|| 1" works just like "" if people want the define_split to
> split all the time, even with this patch.

Except "|| 1" is a syntax error.

You can always write just "1".

> > Note I'm not familiar with all the details here but the documentation
> > does seem ambiguous at best, not supporting to error on empty
> > split-conditions at least.
> 
> Yes, the current patch will stop the "" condition which was accepted
> before.  Thanks for bringing this up!  We have to update the
> documentation if people reach a consensus.

It will help if the error message tells you
  If this is what you intended, write "1".
or similar.  No more documentation is needed then :-)


Segher


Re: [PATCH] c++: cv-qualified dependent name of alias tmpl [PR100592]

2021-06-02 Thread Patrick Palka via Gcc-patches
On Wed, 2 Jun 2021, Jason Merrill wrote:

> On 6/2/21 4:56 PM, Patrick Palka wrote:
> > On Wed, 2 Jun 2021, Patrick Palka wrote:
> > 
> > > On Wed, 2 Jun 2021, Jason Merrill wrote:
> > > 
> > > > On 6/2/21 2:39 PM, Patrick Palka wrote:
> > > > > Here, the dependent template name in the return type of f() resolves
> > > > > to
> > > > > an alias of int& after substitution, and we end up complaining about
> > > > > qualifying this reference type with 'const' from
> > > > > cp_build_qualified_type
> > > > > rather than just silently dropping the qualification as per
> > > > > [dcl.ref]/1.
> > > > 
> > > > Hmm, the patch looks fine, but why does the TYPE_DECL test fail for the
> > > > alias?
> > > 
> > > Ah, I hadn't considered investigating that.  It seems make_typename_type
> > > always returns a _TYPE instead of a TYPE_DECL when resolving a dependent
> > > name that's a template-id, regardless of the tf_keep_type_decl flag.
> > > This can be easily fixed like so, and this change alone is sufficient to
> > > fix the PR (no changes to qualttp20.C needed).  Note that this change
> > > should only have an effect when tf_keep_type_decl is passed to
> > > make_typename_type, and the only such caller is the TYPENAME_TYPE case
> > > of tsubst in question, so this change seems pretty safe.
> > > 
> > > The downside is that we don't get the __restrict__-dropping
> > > "improvement" as exhibited by qualttp20.C that the original patch
> > > provides, so this other approach is more conservative in that sense.
> > > 
> > > So shall we go with the original patch, or something like the following?
> > > (If we go with the original patch, it just occurred to me that we could
> > > remove tf_keep_type_decl altogether.)  Testing in progress.
> > 
> > For sake of concreteness, here's the full alternative patch for
> > consideration (modulo ChangeLog):
> 
> This seems better.  I think the only non-type return from
> lookup_template_class is error_mark_node; does it work to check that
> specifically rather than !TYPE_P?

Indeed, checking for error_mark_node instead works nicely.  Does the
following look OK?  Bootstrapped and regtested on x86_64-pc-linux-gnu.

-- >8 --

Subject: [PATCH] c++: cv-qualified dependent name of alias tmpl [PR100592]

Here, the dependent template name in the return type of f() resolves to
an alias of int& after substitution, and we end up complaining about
qualifying this reference type with 'const' from cp_build_qualified_type
rather than just silently dropping the qualification as per [dcl.ref]/1.

The problem is ultimately that make_typename_type ignores the
tf_keep_type_decl flag when the dependent name is a template-id.  This
in turn causes the TYPE_DECL check within tsubst 
to fail, and so we end up not passing tf_ignore_bad_quals to
cp_build_qualified_type.  This patch fixes this by making
make_typename_type respect the tf_keep_type_decl flag even in the case
of a dependent template-id name.

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for
trunk?

PR c++/100592

gcc/cp/ChangeLog:

* decl.c (make_typename_type): After dispatching to
lookup_template_class, adjust the result to its TYPE_NAME
and then consider the tf_keep_type_decl flag.

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/alias-decl-71.C: New test.
* g++.dg/template/qualttp20.C: Remove dg-error and augment.
---
 gcc/cp/decl.c  | 13 +
 gcc/testsuite/g++.dg/cpp0x/alias-decl-71.C | 13 +
 2 files changed, 22 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/alias-decl-71.C

diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c
index fb21a3a1ae8..a3687dbb0dd 100644
--- a/gcc/cp/decl.c
+++ b/gcc/cp/decl.c
@@ -4136,10 +4136,15 @@ make_typename_type (tree context, tree name, enum 
tag_types tag_type,
 return error_mark_node;
 
   if (want_template)
-return lookup_template_class (t, TREE_OPERAND (fullname, 1),
- NULL_TREE, context,
- /*entering_scope=*/0,
- complain | tf_user);
+{
+  t = lookup_template_class (t, TREE_OPERAND (fullname, 1),
+NULL_TREE, context,
+/*entering_scope=*/0,
+complain | tf_user);
+  if (t == error_mark_node)
+   return error_mark_node;
+  t = TYPE_NAME (t);
+}
   
   if (DECL_ARTIFICIAL (t) || !(complain & tf_keep_type_decl))
 t = TREE_TYPE (t);
diff --git a/gcc/testsuite/g++.dg/cpp0x/alias-decl-71.C 
b/gcc/testsuite/g++.dg/cpp0x/alias-decl-71.C
new file mode 100644
index 000..6a61f93a0b0
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/alias-decl-71.C
@@ -0,0 +1,13 @@
+// PR c++/100592
+// { dg-do compile { target c++11 } }
+
+template
+struct meta {
+  template using if_c = int&;
+};
+
+template
+typename meta::template if_c const f();
+
+using type = 

Re: [PATCH] rtl: constm64_rtx..const64_rtx

2021-06-02 Thread Segher Boessenkool
Hi!

On Wed, Jun 02, 2021 at 06:07:28PM +0100, Richard Sandiford wrote:
> Segher Boessenkool  writes:
> > Since times immemorial there has been const_int_rtx for all values from
> > -64 to 64, but only constm1_rtx..const2_rtx have been available for
> > convenient use.  Change this, so that we can use all values in
> > {-64,...,64} in RTL easily.  This matters, because then we we just say
> >   if (XEXP (x, 1) == const16_rtx)
> > and things like that, since all const_int in that range are unique.  We
> > already do for -1, 0, 1, 2, but we could for everything.

> No strong objection, but personally I'd rather not add something
> that is very specific to VOIDmode CONST_INTs.  I realise it's very
> unlikely that we'll ever be able to give CONST_INTs their proper mode
> (no-one has the kind of time needed to do that), but I don't think we
> should make the switch actively harder either.

How does this make that harder?

Having no mode for CONST_INTs makes some things significantly *easier*
btw.  Well you know that, that is what makes any conversion away from
this so much harder :-)

We have has const0_rtx etc. since forever, this patch just increases the
range (to those values that have had guaranteed unique RTXes since
decades as well).

> How about adding a new inline helper function that tests whether an
> rtx is a CONST_INT with a given value?  Then we could have a
> __builtin_constant_p shortcut for the [-64, 64] case.  That would
> also avoid hard-coding the range.

Currently you have to write the example as
  if (XEXP (x, 1) == const_int_rtx[MAX_SAVED_CONST_INT + 16])
and with your suggestion it will be
  if (is_const_int_with_value (XEXP (x, 1), 16))

I like
  if (XEXP (x, 1) == const16_rtx)
better: it is shorter and clearer (often you have something like this is
more complex conditionals, it matters), and this pattern is already very
well known (for -1, 0, 1, 2).

Do you like this patch a bit better if I also add such an
is_const_int_with_value function?


Segher


[Patch] Fortran/OpenMP: Add omp loop [PR99928]

2021-06-02 Thread Tobias Burnus

This patch adds support for 'omp loop' to gfortran including the combined
constructs. It also fixes some splitting issues with clauses in
combined constructs.

It does not attempt to clean up all remaining Fortran issues with
clauses in combined constructs (cf. below + PR).

 * * *

Since 'parallel loop' is now supported, using
 !$omp parallel &
 !$acc& loop
no longer gave an error. (Same result before: '!$acc& do'.)

With the current unpatched parser, there is either everything
successfully parsed – and and processed – or it fails with
MATCH_NO and gfortran tries it again with another different
match attempt. The error is cached and only shown, when
still pending and nothing else worked.
Using gfc_error_now prevents the delayed/conditional output
but as several matching attempts are done, the error is show
half a dozen times.

I played around, but I then determined that it
is the simplest to abort with a fatal error in this case.
Hence, I split the testcase into three – and placed it into the
goacc-gomp directory (where it belongs to but which did not
exist for quite some time).
(The early parse errors + the first fatal error can be in one
file, the next fatal error needs an extra file and
resolution-time errors must be yet in another file.)
As the name 'omp*.f*' did not fit any more, I renamed it.


Plus:
  !$omp foo
  !$acc bar
is not a continuation line – which was mishandled in
fixed-form Fortran (cf. mixed-omp-3.f + scanner.c)
- causing a bogus error as no continuation line was involved.

Not in this testcase, but if the first statement is either
an '!$xxx end ...' or a single-statement where nothing
follows, this could prevent valid fixed-form code from
running ...

 * * *

This patch includes all Fortranized pr99928-3.c testcases, except for
pr99928-9.c and pr99928-10.c, which use array sections in 'reduction'.
Additionally, 'defaultmap(none)' is currently commented as it is not yet
supported.

Regarding the xfails: There are some shared() ones and the target map(tofrom),
which I did not attempt to fix in this patch. And I observe the following
fails in pr99928-7.f90:

The check does (for Fortran – and alike for C):
  ! { dg-final { scan-tree-dump-not "omp distribute\[^\n\r]*lastprivate\\(j00\\)" 
"gimple" } }
  ! { dg-final { scan-tree-dump-not "omp parallel\[^\n\r]*lastprivate\\(j00\\)" 
"gimple" } }
  ! { dg-final { scan-tree-dump-not "omp for\[^\n\r]*lastprivate\\(j00\\)" 
"gimple" } }
  ! { dg-final { scan-tree-dump "omp simd\[^\n\r]*linear\\(j00:1\\)" "gimple" } 
}
  !$omp distribute parallel do simd default(none)
  do j00 = 1, 64
  end do

While C generates (original dump)

  #pragma omp distribute
  #pragma omp parallel default(none)
#pragma omp for nowait
#pragma omp simd

Fortran generates the same except for:
#pragma omp simd linear(j00:1)

The latter leads to a different result for the gimple dump. Fortran has:
  #pragma omp distribute private(j00.1)
#pragma omp parallel default(none) lastprivate(j00)
#pragma omp for nowait private(j00.0)
  #pragma omp simd linear(j00:1)
while C has the same without 'lastprivate(j00)'.

And the dump checks that 'lastprivate(j00)' does not appear ...

 * * *

OK? – Comments, remarks?

Tobias

-
Mentor Graphics (Deutschland) GmbH, Arnulfstrasse 201, 80634 München 
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Frank 
Thürauf
Fortran/OpenMP: Add omp loop [PR99928]

	PR middle-end/99928

gcc/fortran/ChangeLog:

	* dump-parse-tree.c (show_omp_clauses): Handle bind clause.
	(show_omp_node): Handle loop directive.
	* frontend-passes.c (gfc_code_walker): Likewise.
	* gfortran.h (enum gfc_statement): Add
	ST_OMP_(END_)(TARGET_)(|PARALLEL_|TEAMS_)LOOP.
	(enum gfc_omp_bind_type): New.
	(gfc_omp_clauses): Use it.
	(enum gfc_exec_op): Add EXEC_OMP_(TARGET_)(|PARALLEL_|TEAMS_)LOOP.
	* match.h (gfc_match_omp_loop, gfc_match_omp_parallel_loop,
	gfc_match_omp_target_parallel_loop, gfc_match_omp_target_teams_loop,
	gfc_match_omp_teams_loop): New.
	* openmp.c (enum omp_mask1): Add OMP_CLAUSE_BIND.
	(gfc_match_omp_clauses): Handle it.
	(OMP_LOOP_CLAUSES, gfc_match_omp_loop, gfc_match_omp_teams_loop,
	gfc_match_omp_target_teams_loop, gfc_match_omp_parallel_loop,
	gfc_match_omp_target_parallel_loop): New.
	(resolve_omp_clauses, resolve_omp_do, omp_code_to_statement,
	gfc_resolve_omp_directive): Handle omp loop.
	* parse.c (decode_omp_directive case_exec_markers, gfc_ascii_statement,
	parse_omp_do, parse_executable): Likewise.
	(parse_omp_structured_block): Remove ST_ which use parse_omp_do.
	* resolve.c (gfc_resolve_blocks): Add omp loop.
	* st.c (gfc_free_statement): Likewise.
	* trans-openmp.c (gfc_trans_omp_clauses): Handle bind clause.
	(gfc_trans_omp_do, gfc_trans_omp_parallel_do, gfc_trans_omp_distribute,
	gfc_trans_omp_teams, gfc_trans_omp_target, gfc_trans_omp_directive):
	Handle loop directive.
	(gfc_split_omp_clauses): Likewise; fix 

RE: [backport gcc10, gcc9] Requet to backport PR97969

2021-06-02 Thread Przemyslaw Wirkus via Gcc-patches
Hi,

> -Original Message-
> From: Vladimir Makarov 
> Sent: 31 May 2021 16:52
> To: Przemyslaw Wirkus ; Richard Biener
> 
> Cc: gcc-patches@gcc.gnu.org; ja...@redhat.com; ni...@redhat.com;
> Richard Earnshaw ; Ramana Radhakrishnan
> ; Kyrylo Tkachov
> 
> Subject: Re: [backport gcc10, gcc9] Requet to backport PR97969
> 
> 
> On 2021-05-25 5:14 a.m., Przemyslaw Wirkus wrote:
> > Hi,
> > Just a follow up after GCC 11 release.
> >
> > I've backported to gcc-10 branch (without any change to original patches)
> > PR97969 and following PR98722 & PR98777 patches.
> >
> > Commits apply cleanly without changes.
> > Built and regression tested on:
> > * arm-none-eabi and
> > * aarch64-none-linux-gnu cross toolchains.
> >
> > There were no issues and no regressions (all OK).
> >
> > OK for backport to gcc-10 branch ?
> 
> Sorry for delay with the answer due to my vacation.
> 
> As the patches did not introduce new PRs I believe they are ok for gcc-10.

Backported to gcc-10 branch. Thank you for your support.

Kind regards
Przemyslaw

> Thank you.
> 
> >
> > Kind regards,
> > Przemyslaw Wirkus
> >
> > ---
> > commits I've backported:
> >
> > commit cf2ac1c30af0fa783c8d72e527904dda5d8cc330
> > Author: Vladimir N. Makarov 
> > Date:   Tue Jan 12 11:26:15 2021 -0500
> >
> >  [PR97969] LRA: Transform pattern `plus (plus (hard reg, const), 
> > pseudo)`
> after elimination
> >
> > commit 4334b524274203125193a08a8485250c41c2daa9
> > Author: Vladimir N. Makarov 
> > Date:   Wed Jan 20 11:40:14 2021 -0500
> >
> >  [PR98722] LRA: Check that target has no 3-op add insn to transform 2
> plus expression.
> >
> > commit 68ba1039c7daf0485b167fe199ed7e8031158091
> > Author: Vladimir N. Makarov 
> > Date:   Thu Jan 21 17:27:01 2021 -0500
> >
> >  [PR98777] LRA: Use preliminary created pseudo for in LRA elimination
> subpass
> >
> > $ ./contrib/git-backport.py cf2ac1c30af0fa783c8d72e527904dda5d8cc330
> > $ ./contrib/git-backport.py 4334b524274203125193a08a8485250c41c2daa9
> > $ ./contrib/git-backport.py 68ba1039c7daf0485b167fe199ed7e8031158091
> >
> >
> >> Richard.



Re: [PATCH] rs6000: Support doubleword swaps removal in rot64 load store [PR100085]

2021-06-02 Thread Segher Boessenkool
On Wed, Jun 02, 2021 at 03:19:32AM -0500, Xionghu Luo wrote:
> On P8LE, extra rot64+rot64 load or store instructions are generated
> in float128 to vector __int128 conversion.
> 
> This patch teaches pass swaps to also handle such pattens to remove
> extra swap instructions.

Did you check if this is already handled by simplify-rtx if the mode had
been TImode (not V1TImode)?  If not, why do you not handle it there?

> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr100085.c
> @@ -0,0 +1,28 @@
> +/* { dg-do compile { target { powerpc*-*-linux* } } } */

Just
  /* { dg-do compile } */
please (or is there any reason to do this on linux only?)

> +/* { dg-require-effective-target powerpc_float128_sw_ok } */
> +/* { dg-options "-O2 -mdejagnu-cpu=power8 -mfloat128 -mno-regnames" } */

-mfloat128 is implied by -mcpu=power8.  Why do you use -mno-regnames?

> +#ifndef __FLOAT128__
> +#error "-mfloat128 is not supported."
> +#endif

So this can be deleted as well.


Segher


[PATCH 2/2] rs6000: Add test for _mm_minpos_epu16

2021-06-02 Thread Paul A. Clarke via Gcc-patches
Copy the test for _mm_minpos_epu16 from
gcc/testsuite/gcc.target/i386/sse4_1-phminposuw.c, with
a few adjustments:

- Adjust the dejagnu directives for powerpc platform.
- Make the data not be monotonically increasing,
  such that some of the returned values are not
  always the first value (index 0).
- Fix a masking issue where the index was being truncated
  to 2 bits instead of 3 bits, which wasn't found because
  all of the returned indicies were 0 with the original
  generated data.
- Support big-endian.

2021-06-02  Paul A. Clarke  

gcc/testsuite/ChangeLog:
* gcc.target/powerpc/sse4_1-phminposuw.c: Copy from
gcc/testsuite/gcc.target/i386, make more robust.
---
 .../gcc.target/powerpc/sse4_1-phminposuw.c| 63 +++
 1 file changed, 63 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-phminposuw.c

diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-phminposuw.c 
b/gcc/testsuite/gcc.target/powerpc/sse4_1-phminposuw.c
new file mode 100644
index ..0b6318500b1e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-phminposuw.c
@@ -0,0 +1,63 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#define NO_WARN_X86_INTRINSICS 1
+#ifndef CHECK_H
+#define CHECK_H "sse4_1-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse4_1_test
+#endif
+
+#include CHECK_H
+
+#include 
+
+#define NUM 64
+
+static void
+TEST (void)
+{
+  union
+{
+  __m128i x[NUM/8];
+  unsigned short s[NUM];
+} src;
+  unsigned short minVal[NUM/8];
+  int minInd[NUM/8];
+  unsigned short minValScalar, minIndScalar;
+  int i, j;
+  union
+{
+  int i;
+  unsigned short s[2];
+} res;
+
+  for (i = 0; i < NUM; i++)
+src.s[i] = i * i - 68 * i + 1200;
+
+  for (i = 0, j = 0; i < NUM; i += 8, j++)
+{
+  res.i = _mm_cvtsi128_si32 (_mm_minpos_epu16 (src.x [i/8]));
+  minVal[j] = res.s[0];
+  minInd[j] = res.s[1] & 0b111;
+}
+
+  for (i = 0; i < NUM; i += 8)
+{
+  minValScalar = src.s[i];
+  minIndScalar = 0;
+
+  for (j = i + 1; j < i + 8; j++)
+   if (minValScalar > src.s[j])
+ {
+   minValScalar = src.s[j];
+   minIndScalar = j - i;
+ }
+
+  if (minValScalar != minVal[i/8] && minIndScalar != minInd[i/8])
+   abort ();
+}
+}
-- 
2.27.0



[PATCH 1/2] rs6000: Add support for _mm_minpos_epu16

2021-06-02 Thread Paul A. Clarke via Gcc-patches
Add a naive implementation of the subject x86 intrinsic to
ease porting.

2021-06-02  Paul A. Clarke  

gcc/ChangeLog:
* config/rs6000/smmintrin.h (_mm_minpos_epu16): New.
---
 gcc/config/rs6000/smmintrin.h | 27 +++
 1 file changed, 27 insertions(+)

diff --git a/gcc/config/rs6000/smmintrin.h b/gcc/config/rs6000/smmintrin.h
index bdf6eb365d88..358a48958192 100644
--- a/gcc/config/rs6000/smmintrin.h
+++ b/gcc/config/rs6000/smmintrin.h
@@ -116,4 +116,31 @@ _mm_blendv_epi8 (__m128i __A, __m128i __B, __m128i __mask)
   return (__m128i) vec_sel ((__v16qu) __A, (__v16qu) __B, __lmask);
 }
 
+/* Return horizontal packed word minimum and its index in bits [15:0]
+   and bits [18:16] respectively.  */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
+_mm_minpos_epu16 (__m128i __A)
+{
+  union __u
+{
+  __m128i __m;
+  __v8hu __uh;
+};
+  union __u __u = { .__m = __A }, __r = { .__m = {0} };
+  unsigned short __ridx = 0;
+  unsigned short __rmin = __u.__uh[__ridx];
+  for (unsigned long __i = __ridx+1;
+   __i < sizeof (__u.__uh) / sizeof (__u.__uh[0]);
+   __i++)
+{
+  if (__u.__uh[__i] < __rmin)
+{
+  __rmin = __u.__uh[__i];
+  __ridx = __i;
+}
+}
+  __r.__uh[0] = __rmin;
+  __r.__uh[1] = __ridx;
+  return __r.__m;
+}
 #endif
-- 
2.27.0



[PATCH 0/2] rs6000: Add support for _mm_minpos_epu16

2021-06-02 Thread Paul A. Clarke via Gcc-patches
Added compatible implementation of _mm_minpos_epu16 for powerpc.
Copied, improved, and fixed testcase from i386.
Tested on BE, LE (32 and 64bit).

Paul A. Clarke (2):
  rs6000: Add support for _mm_minpos_epu16
  rs6000: Add test for _mm_minpos_epu16

 gcc/config/rs6000/smmintrin.h | 27 
 .../gcc.target/powerpc/sse4_1-phminposuw.c| 63 +++
 2 files changed, 90 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-phminposuw.c

-- 
2.27.0



Re: [PATCH] teach compute_objsize about placement new (PR 100876)

2021-06-02 Thread Marek Polacek via Gcc-patches
On Wed, Jun 02, 2021 at 03:40:49PM -0600, Martin Sebor via Gcc-patches wrote:
> +  if (!gimple_call_builtin_p (stmt, BUILT_IN_NORMAL))
> +{
> +  /* See if this is a call to placement new.  */
> +  if (!fn
> +   || !DECL_IS_OPERATOR_NEW_P (fn)
> +   || DECL_IS_REPLACEABLE_OPERATOR_NEW_P (fn))
> + return NULL_TREE;
> +
> +  tree fname = DECL_ASSEMBLER_NAME (fn);
> +  const char *name = IDENTIFIER_POINTER (fname);
> +  if (strcmp (name, "_ZnwmPv")   // ordinary form
> +   && strcmp (name, "_ZnamPv"))   // array form
> + return NULL_TREE;

Not a review, but you can use id_equal here and simplify things.

Marek



[PATCH] teach compute_objsize about placement new (PR 100876)

2021-06-02 Thread Martin Sebor via Gcc-patches

The two forms of placement operator new defined in  return their
pointer argument and may not be displaced by user-defined functions.
But because they are ordinary (not built-in) functions this property
isn't reflected in their declarations alone, and there's no user-
level attribute to annotate them with.  When they are inlined
the property is transparent in the IL but when they are not (without
inlining such as -O0), calls to the operators appear in the IL and
cause -Wmismatched-new-delete to try to match them with the functions
called to deallocate memory.  When the pointer to the memory was
obtained from a function that matches the deallocator but not
the placement new, the warning falsely triggers.

The attached patch solves this by detecting calls to placement new
and treating them the same as those to other pass-through calls (such
as memset).  In addition, it also teaches -Wfree-nonheap-object about
placement delete, for a similar reason as above.  Finally, it also
adds a test for attribute fn spec indicating a function returns its
argument.  It's not necessary for the fix (I had initially though
placement new might have the attribute) but it seems appropriate
to check.

Tested on x86_64-linux.

Martin
PR c++/100876 - -Wmismatched-new-delete should understand placement new when it's not inlined

gcc/ChangeLog:

	PR c++/100876
	* builtins.c (gimple_call_return_array): Check for attribute fn spec.
	Handle calls to placement new.
	(ndecl_dealloc_argno): Avoid placement delete.

gcc/testsuite/ChangeLog:

	PR c++/100876
	* g++.dg/warn/Wmismatched-new-delete-4.C: New test.
	* g++.dg/warn/Wmismatched-new-delete-5.C: New test.
	* g++.dg/warn/Wstringop-overflow-7.C: New test.
	* g++.dg/warn/Wfree-nonheap-object-6.C: New test.
	* g++.dg/analyzer/placement-new.C: Prune out expected warning.

diff --git a/gcc/builtins.c b/gcc/builtins.c
index af1fe49bb48..fb0717a0248 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -5159,11 +5159,43 @@ static tree
 gimple_call_return_array (gimple *stmt, offset_int offrng[2],
 			  range_query *rvals)
 {
-  if (!gimple_call_builtin_p (stmt, BUILT_IN_NORMAL)
-  || gimple_call_num_args (stmt) < 1)
+  {
+/* Check for attribute fn spec to see if the function returns one
+   of its arguments.  */
+attr_fnspec fnspec = gimple_call_fnspec (as_a (stmt));
+unsigned int argno;
+if (fnspec.returns_arg ())
+  {
+	offrng[0] = offrng[1] = 0;
+	return gimple_call_arg (stmt, argno);
+  }
+  }
+
+  if (gimple_call_num_args (stmt) < 1)
 return NULL_TREE;
 
   tree fn = gimple_call_fndecl (stmt);
+  if (!gimple_call_builtin_p (stmt, BUILT_IN_NORMAL))
+{
+  /* See if this is a call to placement new.  */
+  if (!fn
+	  || !DECL_IS_OPERATOR_NEW_P (fn)
+	  || DECL_IS_REPLACEABLE_OPERATOR_NEW_P (fn))
+	return NULL_TREE;
+
+  tree fname = DECL_ASSEMBLER_NAME (fn);
+  const char *name = IDENTIFIER_POINTER (fname);
+  if (strcmp (name, "_ZnwmPv")   // ordinary form
+	  && strcmp (name, "_ZnamPv"))   // array form
+	return NULL_TREE;
+
+  if (gimple_call_num_args (stmt) != 2)
+	return NULL_TREE;
+
+  offrng[0] = offrng[1] = 0;
+  return gimple_call_arg (stmt, 1);
+}
+
   switch (DECL_FUNCTION_CODE (fn))
 {
 case BUILT_IN_MEMCPY:
@@ -13285,7 +13317,18 @@ fndecl_dealloc_argno (tree fndecl)
 {
   /* A call to operator delete isn't recognized as one to a built-in.  */
   if (DECL_IS_OPERATOR_DELETE_P (fndecl))
-return 0;
+{
+  if (DECL_IS_REPLACEABLE_OPERATOR (fndecl))
+	return 0;
+
+  /* Avoid placement delete that's not been inlined.  */
+  tree fname = DECL_ASSEMBLER_NAME (fndecl);
+  const char *name = IDENTIFIER_POINTER (fname);
+  if (strcmp (name, "_ZdlPvS_") == 0   // ordinary form
+	  || strcmp (name, "_ZdaPvS_") == 0)   // array form
+	return UINT_MAX;
+  return 0;
+}
 
   /* TODO: Handle user-defined functions with attribute malloc?  Handle
  known non-built-ins like fopen?  */
diff --git a/gcc/testsuite/g++.dg/analyzer/placement-new.C b/gcc/testsuite/g++.dg/analyzer/placement-new.C
index 8250f45b9d9..b648a428247 100644
--- a/gcc/testsuite/g++.dg/analyzer/placement-new.C
+++ b/gcc/testsuite/g++.dg/analyzer/placement-new.C
@@ -24,3 +24,5 @@ void test_3 (void)
   int *p = new(buf) int (42);
   delete p; // { dg-warning "memory not on the heap" }
 }
+
+// { dg-prune-output "-Wfree-nonheap-object" }
diff --git a/gcc/testsuite/g++.dg/warn/Wfree-nonheap-object-6.C b/gcc/testsuite/g++.dg/warn/Wfree-nonheap-object-6.C
new file mode 100644
index 000..83b6ff9157c
--- /dev/null
+++ b/gcc/testsuite/g++.dg/warn/Wfree-nonheap-object-6.C
@@ -0,0 +1,45 @@
+/* { dg-do compile }
+   { dg-options "-O0 -Wall" } */
+
+#if __cplusplus < 201103L
+# define noexcept throw ()
+#endif
+
+void* operator new (__SIZE_TYPE__, void* __p) noexcept;
+void operator delete (void*, void*);
+
+void* operator new[] (__SIZE_TYPE__, void* __p) noexcept;
+void operator delete[] (void*, void*) 

Re: [PATCH] c++: cv-qualified dependent name of alias tmpl [PR100592]

2021-06-02 Thread Jason Merrill via Gcc-patches

On 6/2/21 4:56 PM, Patrick Palka wrote:

On Wed, 2 Jun 2021, Patrick Palka wrote:


On Wed, 2 Jun 2021, Jason Merrill wrote:


On 6/2/21 2:39 PM, Patrick Palka wrote:

Here, the dependent template name in the return type of f() resolves to
an alias of int& after substitution, and we end up complaining about
qualifying this reference type with 'const' from cp_build_qualified_type
rather than just silently dropping the qualification as per [dcl.ref]/1.


Hmm, the patch looks fine, but why does the TYPE_DECL test fail for the alias?


Ah, I hadn't considered investigating that.  It seems make_typename_type
always returns a _TYPE instead of a TYPE_DECL when resolving a dependent
name that's a template-id, regardless of the tf_keep_type_decl flag.
This can be easily fixed like so, and this change alone is sufficient to
fix the PR (no changes to qualttp20.C needed).  Note that this change
should only have an effect when tf_keep_type_decl is passed to
make_typename_type, and the only such caller is the TYPENAME_TYPE case
of tsubst in question, so this change seems pretty safe.

The downside is that we don't get the __restrict__-dropping
"improvement" as exhibited by qualttp20.C that the original patch
provides, so this other approach is more conservative in that sense.

So shall we go with the original patch, or something like the following?
(If we go with the original patch, it just occurred to me that we could
remove tf_keep_type_decl altogether.)  Testing in progress.


For sake of concreteness, here's the full alternative patch for
consideration (modulo ChangeLog):


This seems better.  I think the only non-type return from 
lookup_template_class is error_mark_node; does it work to check that 
specifically rather than !TYPE_P?



-- >8 --

---
  gcc/cp/decl.c  | 13 +
  gcc/testsuite/g++.dg/cpp0x/alias-decl-71.C | 13 +
  2 files changed, 22 insertions(+), 4 deletions(-)
  create mode 100644 gcc/testsuite/g++.dg/cpp0x/alias-decl-71.C

diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c
index fb21a3a1ae8..1be232af483 100644
--- a/gcc/cp/decl.c
+++ b/gcc/cp/decl.c
@@ -4136,10 +4136,15 @@ make_typename_type (tree context, tree name, enum 
tag_types tag_type,
  return error_mark_node;
  
if (want_template)

-return lookup_template_class (t, TREE_OPERAND (fullname, 1),
- NULL_TREE, context,
- /*entering_scope=*/0,
- complain | tf_user);
+{
+  t = lookup_template_class (t, TREE_OPERAND (fullname, 1),
+NULL_TREE, context,
+/*entering_scope=*/0,
+complain | tf_user);
+  if (!TYPE_P (t))
+   return t;
+  t = TYPE_NAME (t);
+}

if (DECL_ARTIFICIAL (t) || !(complain & tf_keep_type_decl))

  t = TREE_TYPE (t);
diff --git a/gcc/testsuite/g++.dg/cpp0x/alias-decl-71.C 
b/gcc/testsuite/g++.dg/cpp0x/alias-decl-71.C
new file mode 100644
index 000..6a61f93a0b0
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/alias-decl-71.C
@@ -0,0 +1,13 @@
+// PR c++/100592
+// { dg-do compile { target c++11 } }
+
+template
+struct meta {
+  template using if_c = int&;
+};
+
+template
+typename meta::template if_c const f();
+
+using type = decltype(f());
+using type = int&;





[PATCH][RFC] Sparse on entry cache for Ranger.

2021-06-02 Thread Andrew MacLeod via Gcc-patches
As mentioned earlier, I abstracted the on-entry cache at the beginning 
of stage1. This was to make it easier to port future changes back to 
GCC11 so we could provide alternate representations to deal with memory 
issues, or what have you.


This patch introduces a sparse representation of the cache which is 
used  when the number of basic blocks gets too large.


I commandeered the bitmap code since its efficient and has been working 
a long time, and added 2 routines to get and set 4 bits (quads) at a 
time.  This allows me to use a bitmap like its a sparse array which can 
contain a value between 0 and 15, and is conveniently pre-initialized to 
values of 0 at no cost :-)   This is then used as an index into a small 
local table to store ranges for the name.  Its limiting in that an 
ssa-name will not be able to have more than 15 unique ranges, but this 
happens in less than 8% of all cases in the data I collected, and most 
of those are switches.. any ranges after the 15 slots are full revert to 
VARYING.  The values for VARYING and UNDEFINED are pre-populated, and 
for pointers, I also pre-populate [0,0] and ~[0, 0].


This also adds --param=evrp-sparse-threshold=  which allows the 
threshold between the full vector and this new sparse representation to 
be changed. It defaults to a value of 800. I've done various performance 
runs, and this seems to be a reasonably balanced number. In fact its a 
28% improvement in EVRP compile time over 390 files from a gcc bootstrap 
with minimal impact on missed opportunities.


I've also tried to see if using less than 15 values has any significant 
effect (The lookup is linear when setting), but it does not appear to.


I've also bootstrapped with the sparse threshold at 0 to ensure there 
aren't any issues.


My thoughts are I would put this into trunk, and assuming nothing comes 
up  over the next couple of days, port it back to GCC11 to resolve 
100299 and other excessive memory consumption PRs there as well. given 
that its reusing bitmap code for the sparse representation, it seems 
like it would be low risk.


Are we OK with the addition of the bitmap_get_quad and bitmap_set_quad 
routines in bitmap.c?  It seems like they might be useful to others.  
They are simple tweaks of bitmap_set_bit and bitmap_bit_p.. just dealing 
with 4 bits at a time.  I could make them local if this is a problem, 
but i don't have access to the bitmap internals there.


Bootstraps on x86_64-pc-linux-gnu with no regressions.

Andrew

PS in PR10299 we spend a fraction of a second in EVRP now.



>From fb2d9360b0f347bf8af651e9e3382ceca9769787 Mon Sep 17 00:00:00 2001
From: Andrew MacLeod 
Date: Wed, 2 Jun 2021 15:08:10 -0400
Subject: [PATCH 2/3] Implement "quad" bit accessors in bitmap for sparse
 arrays of 4 bit values.

	* bitmap.c (bitmap_set_quad): New.
	(bitmap_get_quad): New.
	(test_quad): New.
	(test_quad): Call test_quad.
	* bitmap.h (bitmap_set_quad, bitmap_get_quad): New prototypes.
---
 gcc/bitmap.c | 87 
 gcc/bitmap.h |  7 +
 2 files changed, 94 insertions(+)

diff --git a/gcc/bitmap.c b/gcc/bitmap.c
index 5a650cdfc1d..0babcc7cb30 100644
--- a/gcc/bitmap.c
+++ b/gcc/bitmap.c
@@ -1004,6 +1004,68 @@ bitmap_bit_p (const_bitmap head, int bit)
   return (ptr->bits[word_num] >> bit_num) & 1;
 }
 
+/* Set 4 bits at a time in a bitmap.
+   store QUAD_VALUE at bits QUAD*4 through QUAD*4 + 3 in bitmap HEAD.
+   This is the set routine for viewing bitmap as a 4 bit sparse array.  */
+
+void
+bitmap_set_quad (bitmap head, int quad, int quad_value)
+{
+  int bit = quad * 4;
+  gcc_checking_assert (quad_value >= 0 && quad_value <= 0x0F);
+  unsigned indx = bit / BITMAP_ELEMENT_ALL_BITS;
+  bitmap_element *ptr;
+  if (!head->tree_form)
+ptr = bitmap_list_find_element (head, indx);
+  else
+ptr = bitmap_tree_find_element (head, indx);
+  unsigned word_num = bit / BITMAP_WORD_BITS % BITMAP_ELEMENT_WORDS;
+  unsigned bit_num  = bit % BITMAP_WORD_BITS;
+  BITMAP_WORD bit_val = ((BITMAP_WORD) quad_value) << bit_num;
+  BITMAP_WORD mask = ~(((BITMAP_WORD) 0x0F) << bit_num);
+
+  if (ptr != 0)
+{
+  ptr->bits[word_num] &= mask;
+  ptr->bits[word_num] |= bit_val;
+  return;
+}
+
+  ptr = bitmap_element_allocate (head);
+  ptr->indx = bit / BITMAP_ELEMENT_ALL_BITS;
+  ptr->bits[word_num] = bit_val;
+  if (!head->tree_form)
+bitmap_list_link_element (head, ptr);
+  else
+bitmap_tree_link_element (head, ptr);
+}
+
+/* Return a set of 4 consecutive bits starting at bit QUAD * 4.
+   This is the get routine for viewing bitmap as a 4 bit sparse array.  */
+
+int
+bitmap_get_quad (const_bitmap head, int quad)
+{
+  int bit = quad * 4;
+  unsigned int indx = bit / BITMAP_ELEMENT_ALL_BITS;
+  const bitmap_element *ptr;
+  unsigned bit_num;
+  unsigned word_num;
+
+  if (!head->tree_form)
+ptr = bitmap_list_find_element (const_cast (head), indx);
+  else
+ptr = bitmap_tree_find_element (const_cast 

Re: [PATCH v2] REE: PR rtl-optimization/100264: Handle more PARALLEL SET expressions

2021-06-02 Thread Jim Wilson
On Mon, May 10, 2021 at 5:39 AM Christoph Muellner 
wrote:

> gcc/ChangeLog:
> PR rtl-optimization/100264
> * ree.c (get_sub_rtx): Ignore SET expressions without register
> destinations and remove assertion, as it is not valid anymore
> with this new behaviour.
> (merge_def_and_ext): Eliminate destination check for register
> as such SET expressions can't occur anymore.
> (combine_reaching_defs): Likewise.
>

The revised patch looks OK to me, and passed my testing.  I pushed it.

Jim


Re: [PATCH] c++: cv-qualified dependent name of alias tmpl [PR100592]

2021-06-02 Thread Patrick Palka via Gcc-patches
On Wed, 2 Jun 2021, Patrick Palka wrote:

> On Wed, 2 Jun 2021, Jason Merrill wrote:
> 
> > On 6/2/21 2:39 PM, Patrick Palka wrote:
> > > Here, the dependent template name in the return type of f() resolves to
> > > an alias of int& after substitution, and we end up complaining about
> > > qualifying this reference type with 'const' from cp_build_qualified_type
> > > rather than just silently dropping the qualification as per [dcl.ref]/1.
> > 
> > Hmm, the patch looks fine, but why does the TYPE_DECL test fail for the 
> > alias?
> 
> Ah, I hadn't considered investigating that.  It seems make_typename_type
> always returns a _TYPE instead of a TYPE_DECL when resolving a dependent
> name that's a template-id, regardless of the tf_keep_type_decl flag.
> This can be easily fixed like so, and this change alone is sufficient to
> fix the PR (no changes to qualttp20.C needed).  Note that this change
> should only have an effect when tf_keep_type_decl is passed to
> make_typename_type, and the only such caller is the TYPENAME_TYPE case
> of tsubst in question, so this change seems pretty safe.
> 
> The downside is that we don't get the __restrict__-dropping
> "improvement" as exhibited by qualttp20.C that the original patch
> provides, so this other approach is more conservative in that sense.
> 
> So shall we go with the original patch, or something like the following?
> (If we go with the original patch, it just occurred to me that we could
> remove tf_keep_type_decl altogether.)  Testing in progress.

For sake of concreteness, here's the full alternative patch for
consideration (modulo ChangeLog):

-- >8 --

---
 gcc/cp/decl.c  | 13 +
 gcc/testsuite/g++.dg/cpp0x/alias-decl-71.C | 13 +
 2 files changed, 22 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/alias-decl-71.C

diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c
index fb21a3a1ae8..1be232af483 100644
--- a/gcc/cp/decl.c
+++ b/gcc/cp/decl.c
@@ -4136,10 +4136,15 @@ make_typename_type (tree context, tree name, enum 
tag_types tag_type,
 return error_mark_node;
 
   if (want_template)
-return lookup_template_class (t, TREE_OPERAND (fullname, 1),
- NULL_TREE, context,
- /*entering_scope=*/0,
- complain | tf_user);
+{
+  t = lookup_template_class (t, TREE_OPERAND (fullname, 1),
+NULL_TREE, context,
+/*entering_scope=*/0,
+complain | tf_user);
+  if (!TYPE_P (t))
+   return t;
+  t = TYPE_NAME (t);
+}
   
   if (DECL_ARTIFICIAL (t) || !(complain & tf_keep_type_decl))
 t = TREE_TYPE (t);
diff --git a/gcc/testsuite/g++.dg/cpp0x/alias-decl-71.C 
b/gcc/testsuite/g++.dg/cpp0x/alias-decl-71.C
new file mode 100644
index 000..6a61f93a0b0
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/alias-decl-71.C
@@ -0,0 +1,13 @@
+// PR c++/100592
+// { dg-do compile { target c++11 } }
+
+template
+struct meta {
+  template using if_c = int&;
+};
+
+template
+typename meta::template if_c const f();
+
+using type = decltype(f());
+using type = int&;
-- 
2.32.0.rc2



Re: [EXTERNAL] Re: [PATCH] tree-optimization: Optimize division followed by multiply [PR95176]

2021-06-02 Thread Victor Tong via Gcc-patches
Hi Richard,

Thanks for reviewing my patch. I did a search online and you're right -- there 
isn't a vector modulo instruction. I'll remove the X * (Y / X) --> Y - (Y % X) 
pattern and the existing X - (X / Y) * Y --> X % Y from triggering on vector 
types.

I looked into why the following pattern isn't triggering:

  (simplify
   (minus @0 (nop_convert1? (minus (nop_convert2? @0) @1)))
   (view_convert @1))

The nop_converts expand into tree_nop_conversion_p checks. In fn2() of the 
testsuite/gcc.dg/fold-minus-6.c, the expression during generic matching looks 
like: 

42 - (long int) (42 - 42 % x)

When looking at the right-hand side of the expression (the (long int) (42 - 42 
% x)), the tree_nop_conversion_p check fails because of the type precision 
difference. The expression inside of the cast has a 32-bit precision and the 
outer expression has a 64-bit precision.

I looked around at other patterns and it seems like nop_convert and 
view_convert are used because of underflow/overflow concerns. I'm not familiar 
with the two constructs. What's the difference between using them and checking 
TYPE_OVERFLOW_UNDEFINED? In the scenario above, since TYPE_OVERFLOW_UNDEFINED 
is true, the second pattern that I added (X - (X - Y) --> Y) gets triggered.

Thanks,
Victor


From: Richard Biener 
Sent: Tuesday, April 27, 2021 1:29 AM
To: Victor Tong 
Cc: gcc-patches@gcc.gnu.org 
Subject: [EXTERNAL] Re: [PATCH] tree-optimization: Optimize division followed 
by multiply [PR95176] 
 
On Thu, Apr 1, 2021 at 1:03 AM Victor Tong via Gcc-patches
 wrote:
>
> Hello,
>
> This patch fixes PR tree-optimization/95176. A new pattern in match.pd was 
> added to transform "a * (b / a)" --> "b - (b % a)". A new test case was also 
> added to cover this scenario.
>
> The new pattern interfered with the existing pattern of "X - (X / Y) * Y". In 
> some cases (such as in fn4() in gcc/testsuite/gcc.dg/fold-minus-6.c), the new 
> pattern is applied causing the existing pattern to no longer apply. This 
> results in worse code generation because the expression is left as "X - (X - 
> Y)". An additional subtraction pattern of "X - (X - Y) --> Y" was added to 
> this patch to avoid this regression.
>
> I also didn't remove the existing pattern because it triggered in more cases 
> than the new pattern because of a tree_invariant_p check that's inserted by 
> genmatch for the new pattern.

Yes, we do not handle using Y multiple times when it might contain
side-effects in GENERIC folding
(comments in genmatch suggest we can use save_expr but we don't
implement this [anymore]).

On GIMPLE there's also the issue that your new pattern creates a
complex expression which
makes it failed to be used by value-numbering for example where the
old pattern was OK
(eventually, if no conversion was required).

So indeed it looks OK to preserve both.

I wonder why you needed the

+/* X - (X - Y) --> Y */
+(simplify
+ (minus (convert1? @0) (convert2? (minus @@0 @1)))
+ (if ((INTEGRAL_TYPE_P (type) || VECTOR_INTEGER_TYPE_P (type)) &&
TYPE_OVERFLOW_UNDEFINED(type))
+  (convert @1)))

pattern since it should be handled by

  /* Match patterns that allow contracting a plus-minus pair
 irrespective of overflow issues.  */
  /* (A +- B) - A   ->  +- B */
  /* (A +- B) -+ B  ->  A */
  /* A - (A +- B)   -> -+ B */
  /* A +- (B -+ A)  ->  +- B */

in particular

  (simplify
   (minus @0 (nop_convert1? (minus (nop_convert2? @0) @1)))
   (view_convert @1))

if there's supported cases missing I'd rather extend this pattern than
replicating it.

+/* X * (Y / X) is the same as Y - (Y % X).  */
+(simplify
+ (mult:c (convert1? @0) (convert2? (trunc_div @1 @@0)))
+ (if (INTEGRAL_TYPE_P (type) || VECTOR_INTEGER_TYPE_P (type))
+  (minus (convert @1) (convert (trunc_mod @1 @0)

note that if you're allowing vector types you have to use
(view_convert ...) in the
transform and you also need to make sure that the target can expand
the modulo - I suspect that's an issue with the existing pattern as well.
I don't know of any vector ISA that supports modulo (or integer
division, that is).
Restricting the patterns to integer types is probably the most
sensible solution.

Thanks,
Richard.

> I verified that all "make -k check" tests pass when targeting 
> x86_64-pc-linux-gnu.
>
> 2021-03-31  Victor Tong  
>
> gcc/ChangeLog:
>
> * match.pd: Two new patterns: One to optimize division followed by 
>multiply and the other to avoid a regression as explained above
>
> gcc/testsuite/ChangeLog:
>
> * gcc.dg/tree-ssa/20030807-10.c: Update existing test to look for a 
>subtraction because a shift is no longer emitted
> * gcc.dg/pr95176.c: New test to cover optimizing division followed by 
>multiply
>
> I don't have write access to the GCC repo but I've completed the FSF 
> paperwork as I plan to make more contributions in the future. I'm looking for 
> a sponsorship from an existing GCC maintainer before applying for write 
> access.
>
> Thanks,
> 

Re: [PATCH] c++: cv-qualified dependent name of alias tmpl [PR100592]

2021-06-02 Thread Patrick Palka via Gcc-patches
On Wed, 2 Jun 2021, Jason Merrill wrote:

> On 6/2/21 2:39 PM, Patrick Palka wrote:
> > Here, the dependent template name in the return type of f() resolves to
> > an alias of int& after substitution, and we end up complaining about
> > qualifying this reference type with 'const' from cp_build_qualified_type
> > rather than just silently dropping the qualification as per [dcl.ref]/1.
> 
> Hmm, the patch looks fine, but why does the TYPE_DECL test fail for the alias?

Ah, I hadn't considered investigating that.  It seems make_typename_type
always returns a _TYPE instead of a TYPE_DECL when resolving a dependent
name that's a template-id, regardless of the tf_keep_type_decl flag.
This can be easily fixed like so, and this change alone is sufficient to
fix the PR (no changes to qualttp20.C needed).  Note that this change
should only have an effect when tf_keep_type_decl is passed to
make_typename_type, and the only such caller is the TYPENAME_TYPE case
of tsubst in question, so this change seems pretty safe.

The downside is that we don't get the __restrict__-dropping
"improvement" as exhibited by qualttp20.C that the original patch
provides, so this other approach is more conservative in that sense.

So shall we go with the original patch, or something like the following?
(If we go with the original patch, it just occurred to me that we could
remove tf_keep_type_decl altogether.)  Testing in progress.

-- >8 --

diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c
index fb21a3a1ae8..1be232af483 100644
--- a/gcc/cp/decl.c
+++ b/gcc/cp/decl.c
@@ -4136,10 +4136,15 @@ make_typename_type (tree context, tree name, enum 
tag_types tag_type,
 return error_mark_node;

   if (want_template)
-return lookup_template_class (t, TREE_OPERAND (fullname, 1),
- NULL_TREE, context,
- /*entering_scope=*/0,
- complain | tf_user);
+{
+  t = lookup_template_class (t, TREE_OPERAND (fullname, 1),
+NULL_TREE, context,
+/*entering_scope=*/0,
+complain | tf_user);
+  if (!TYPE_P (t))
+   return t;
+  t = TYPE_NAME (t);
+}

   if (DECL_ARTIFICIAL (t) || !(complain & tf_keep_type_decl))
 t = TREE_TYPE (t);



Re: [PATCH] Canonicalize (vec_duplicate (not A)) to (not (vec_duplicate A)).

2021-06-02 Thread Segher Boessenkool
Hi!

On Wed, Jun 02, 2021 at 09:07:35AM +0200, Richard Biener wrote:
> On Wed, Jun 2, 2021 at 7:41 AM liuhongt via Gcc-patches
>  wrote:
> > For i386, it will enable below opt
> >
> > from
> > notl%edi
> > vpbroadcastd%edi, %xmm0
> > vpand   %xmm1, %xmm0, %xmm0
> > to
> > vpbroadcastd%edi, %xmm0
> > vpandn   %xmm1, %xmm0, %xmm0
> 
> There will be cases where (vec_duplicate (not A)) is better
> than (not (vec_duplicate A)), so I'm not sure it is a good idea
> to forcefully canonicalize unary operations.

It is two unaries in sequence, where the order does not matter either.
As in all such cases you either have to handle both cases everywhere, or
have a canonical order.

> I suppose the
> simplification happens inside combine

combine uses simplify-rtx for most cases (it is part of combine, but
used in quite a few other places these days).

> - doesn't combine
> already have code to try variants of an expression and isn't
> this a good candidate that can be added there, avoiding
> the canonicalization?

As I mentioned, this is done in simplify-rtx in cases that do not have a
canonical representation.  This is critical because it prevents loops.

A very typical example is how UMIN is optimised:

   case UMIN:
  if (trueop1 == CONST0_RTX (mode) && ! side_effects_p (op0))
return op1;
  if (rtx_equal_p (trueop0, trueop1) && ! side_effects_p (op0))
return op0;
  tem = simplify_associative_operation (code, mode, op0, op1);
  if (tem)
return tem;
  break;

(the stuff using "tem").

Hongtao, can we do something similar here?  Does that work well?  Please
try it out :-)


Segher


Re: GCC documentation: porting to Sphinx

2021-06-02 Thread Martin Sebor via Gcc-patches

On 5/31/21 7:25 AM, Martin Liška wrote:

Hello.

I've made quite some progress with the porting of the documentation and
I would like to present it to the community now:
https://splichal.eu/scripts/sphinx/


Just a few issues I noticed in the warnings section:

The headings of some warnings mention the same option twice (e.g.,
-Wabi, -Wabi, -Wno-abi;  -Wdouble-promotion, -Wdouble-promotion,
-Wno-double-promotion;  -Winit-self, -Winit-self, -Wno-init-self).
This looks like a pretty pervasive problem.

Mentioning the -Wno-xxx option is redundant in a heading for -Wxxx.

The headings of some other warnings also mention options that are
only remotely related to them.  E.g., -Wformat has all these:

  -Wformat, -Wno-format, -ffreestanding, -fno-builtin, -Wformat=

(I see the same problem in the attributes section where the headings
for some attributes include option names).

That seems quite puzzling.  I assume it's a consequence of having
index entries for the related options, but I don't think making
them visible in the headings is helfpful.

Headings that in the manual today include a level like

  -Wformat-overflow
  -Wformat-overflow=level

don't mention the level in the Spinx manual:

  -Wformat-overflow, -Wno-format-overflow

When the /level/ is then discussed in the rest of the text it's
not clear what it refers to.

Martin



Note the documentation is automatically ([1]) generated from texinfo 
with a GitHub workflow ([2]).
It's built on the devel/sphinx GCC branch which I periodically with the 
master branch. One can

see the current source .rst files here: [3].

Changes made since the last time:
- a shared content is factored out ([4])
- conditional build is fully supported (even for shared parts)
- manual pages look reasonable well
- folders are created for files which have >= 5 TOC tree entries
- various formatting issues were resolved
- baseconf.py reads BASE-VER, DEV-PHASE, .. files

I've got couple of questions:

1) Do we have to you the following cover text?
    Copyright (c) 1988-2020 Free Software Foundation, Inc.

    Permission is granted to copy, distribute and/or modify this 
document under the terms of the GNU Free Documentation License, Version 
1.3 or any later version published by the Free Software Foundation; with 
the Invariant Sections being "GNU General Public
    License" and "Funding Free Software", the Front-Cover texts 
being (a) (see below), and with the Back-Cover Texts being (b) (see 
below).  A copy of the license is included in the gfdl(7) man page.


    (a) The FSF's Front-Cover Text is:

     A GNU Manual

    (b) The FSF's Back-Cover Text is:

     You have freedom to copy and modify this GNU Manual, like GNU
     software.  Copies published by the Free Software Foundation 
raise

     funds for GNU development.

2) Do we want to generate fsf-funding, gpl and gfdl manual pages?
3) Do we want to preserve the current strange copy mechanism for 
./gcc/doc/tm.texi.in ?

4) Do we want a copyright header for the created .rst files?

Thoughts?
Thanks,
Martin

[1] https://github.com/davidmalcolm/texi2rst
[2] https://github.com/davidmalcolm/texi2rst/actions
[3] https://github.com/marxin/texi2rst-generated/tree/master/sphinx
[4] https://github.com/marxin/texi2rst-generated/tree/master/sphinx/share




Re: [PATCH] c++: cv-qualified dependent name of alias tmpl [PR100592]

2021-06-02 Thread Jason Merrill via Gcc-patches

On 6/2/21 2:39 PM, Patrick Palka wrote:

Here, the dependent template name in the return type of f() resolves to
an alias of int& after substitution, and we end up complaining about
qualifying this reference type with 'const' from cp_build_qualified_type
rather than just silently dropping the qualification as per [dcl.ref]/1.


Hmm, the patch looks fine, but why does the TYPE_DECL test fail for the 
alias?



We already have the tf_ignore_bad_quals flag for this situation, but
the TYPENAME_TYPE branch of tsubst for some reason doesn't always use
this flag.  This patch just makes tsubst unconditionally use this flag
when substituting a TYPENAME_TYPE.

This change also causes us to drop bogus __restrict__ qualifiers more
consistently during substitution, as in qualttp20.C below where we no
longer diagnose the __restrict__ qualifier on B1::r.  Note that if
we artificially introduced a typedef as in B1::s we silently dropped
__restrict__ even before this patch, so this seems like an improvement.

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for
trunk?

PR c++/100592

gcc/cp/ChangeLog:

* pt.c (tsubst) : Always pass
tf_ignore_bad_quals to cp_build_qualified_type.

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/alias-decl-71.C: New test.
* g++.dg/template/qualttp20.C: Remove dg-error and augment.
---
  gcc/cp/pt.c| 10 --
  gcc/testsuite/g++.dg/cpp0x/alias-decl-71.C | 13 +
  gcc/testsuite/g++.dg/template/qualttp20.C  |  6 --
  3 files changed, 21 insertions(+), 8 deletions(-)
  create mode 100644 gcc/testsuite/g++.dg/cpp0x/alias-decl-71.C

diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index 86259e900e9..2da5407a2a7 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -16066,10 +16066,7 @@ tsubst (tree t, tree args, tsubst_flags_t complain, 
tree in_decl)
if (f == error_mark_node)
  return f;
if (TREE_CODE (f) == TYPE_DECL)
- {
-   complain |= tf_ignore_bad_quals;
-   f = TREE_TYPE (f);
- }
+ f = TREE_TYPE (f);
  
  	if (TREE_CODE (f) != TYPENAME_TYPE)

  {
@@ -16091,8 +16088,9 @@ tsubst (tree t, tree args, tsubst_flags_t complain, 
tree in_decl)
  }
  }
  
-	return cp_build_qualified_type_real

- (f, cp_type_quals (f) | cp_type_quals (t), complain);
+   int quals = cp_type_quals (f) | cp_type_quals (t);
+   complain |= tf_ignore_bad_quals;
+   return cp_build_qualified_type_real (f, quals, complain);
}
  
  case UNBOUND_CLASS_TEMPLATE:

diff --git a/gcc/testsuite/g++.dg/cpp0x/alias-decl-71.C 
b/gcc/testsuite/g++.dg/cpp0x/alias-decl-71.C
new file mode 100644
index 000..6a61f93a0b0
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/alias-decl-71.C
@@ -0,0 +1,13 @@
+// PR c++/100592
+// { dg-do compile { target c++11 } }
+
+template
+struct meta {
+  template using if_c = int&;
+};
+
+template
+typename meta::template if_c const f();
+
+using type = decltype(f());
+using type = int&;
diff --git a/gcc/testsuite/g++.dg/template/qualttp20.C 
b/gcc/testsuite/g++.dg/template/qualttp20.C
index 52989bae538..3281f5d9eab 100644
--- a/gcc/testsuite/g++.dg/template/qualttp20.C
+++ b/gcc/testsuite/g++.dg/template/qualttp20.C
@@ -10,13 +10,15 @@ struct AS
  {
typedef void (myT) ();
struct L {};
+  typedef struct {} M;

  };
  
  
  template  struct B1 : T

  {
-  typedef typename T::L __restrict__ r;// { dg-error "'__restrict__' qualifiers 
cannot" }
+  typedef typename T::L __restrict__ r;
+  typedef typename T::M __restrict__ s;
typedef typename T::myT __restrict__ p;
  
// The following are DR 295 dependent

@@ -32,5 +34,5 @@ template  struct B2 : T
myconst b;
  };
  
-B1 b1;	// { dg-message "required" }

+B1 b1;
  B2 b2;





Re: [PATCH] c++: using-enum and access specifiers [PR100862]

2021-06-02 Thread Jason Merrill via Gcc-patches

On 6/2/21 2:39 PM, Patrick Palka wrote:

When copying the enumerators imported by a class-scope using-enum
declaration, we need to override current_access_specifier so that
finish_member_declaration gives them the same access as the using-enum
decl.  The processing of a using-enum is performed after we've seen the
entire definition of the class, so current_access_specifier at this
point is otherwise set to the last access specifier within the class.

For consistency, this patch makes build_enumerator use
set_current_access_from_decl too.

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for
trunk/11?


OK.


PR c++/100862

gcc/cp/ChangeLog:

* pt.c (set_current_access_from_decl): Move to ...
* class.c (set_current_access_from_decl): ... here.
(handle_using_decl): Use it to propagate the access of the
using-enum decl to the copy of the imported enumerator.
* cp-tree.h (set_current_access_from_decl): Declare.
* decl.c (build_enumerator): Simplify using make_temp_override
and set_current_access_from_decl.

gcc/testsuite/ChangeLog:

* g++.dg/cpp2a/using-enum-9.C: New test.
---
  gcc/cp/class.c| 15 
  gcc/cp/cp-tree.h  |  1 +
  gcc/cp/decl.c | 12 ++
  gcc/cp/pt.c   | 14 
  gcc/testsuite/g++.dg/cpp2a/using-enum-9.C | 28 +++
  5 files changed, 46 insertions(+), 24 deletions(-)
  create mode 100644 gcc/testsuite/g++.dg/cpp2a/using-enum-9.C

diff --git a/gcc/cp/class.c b/gcc/cp/class.c
index 354addde773..b53a4dbdd4e 100644
--- a/gcc/cp/class.c
+++ b/gcc/cp/class.c
@@ -207,6 +207,19 @@ static bool type_maybe_constexpr_default_constructor 
(tree);
  static bool type_maybe_constexpr_destructor (tree);
  static bool field_poverlapping_p (tree);
  
+/* Set CURRENT_ACCESS_SPECIFIER based on the protection of DECL.  */

+
+void
+set_current_access_from_decl (tree decl)
+{
+  if (TREE_PRIVATE (decl))
+current_access_specifier = access_private_node;
+  else if (TREE_PROTECTED (decl))
+current_access_specifier = access_protected_node;
+  else
+current_access_specifier = access_public_node;
+}
+
  /* Return a COND_EXPR that executes TRUE_STMT if this execution of the
 'structor is in charge of 'structing virtual bases, or FALSE_STMT
 otherwise.  */
@@ -1359,6 +1372,8 @@ handle_using_decl (tree using_decl, tree t)
 CONST_DECL_USING_P is true.  */
gcc_assert (TREE_CODE (decl) == CONST_DECL);
  
+  auto cas = make_temp_override (current_access_specifier);

+  set_current_access_from_decl (using_decl);
tree copy = copy_decl (decl);
DECL_CONTEXT (copy) = t;
DECL_ARTIFICIAL (copy) = true;
diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index c95a820037f..b1b7e615bcc 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -8186,6 +8186,7 @@ struct atom_hasher : default_hash_traits
  extern bool subsumes(tree, tree);
  
  /* In class.c */

+extern void set_current_access_from_decl (tree);
  extern void cp_finish_injected_record_type (tree);
  
  /* in vtable-class-hierarchy.c */

diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c
index e7268d5ad18..fb21a3a1ae8 100644
--- a/gcc/cp/decl.c
+++ b/gcc/cp/decl.c
@@ -16333,17 +16333,9 @@ incremented enumerator value is too large for 
%"));
  
  	 For which case we need to make sure that the access of `S::i'

 matches the access of `S::E'.  */
-  tree saved_cas = current_access_specifier;
-  if (TREE_PRIVATE (TYPE_NAME (enumtype)))
-   current_access_specifier = access_private_node;
-  else if (TREE_PROTECTED (TYPE_NAME (enumtype)))
-   current_access_specifier = access_protected_node;
-  else
-   current_access_specifier = access_public_node;
-
+  auto cas = make_temp_override (current_access_specifier);
+  set_current_access_from_decl (TYPE_NAME (enumtype));
finish_member_declaration (decl);
-
-  current_access_specifier = saved_cas;
  }
else
  pushdecl (decl);
diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index 31302803c62..86259e900e9 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -190,7 +190,6 @@ static tree tsubst_arg_types (tree, tree, tree, 
tsubst_flags_t, tree);
  static tree tsubst_function_type (tree, tree, tsubst_flags_t, tree);
  static bool check_specialization_scope (void);
  static tree process_partial_specialization (tree);
-static void set_current_access_from_decl (tree);
  static enum template_base_result get_template_base (tree, tree, tree, tree,
bool , tree *);
  static tree try_class_unification (tree, tree, tree, tree, bool);
@@ -26431,19 +26430,6 @@ tsubst_initializer_list (tree t, tree argvec)
return inits;
  }
  
-/* Set CURRENT_ACCESS_SPECIFIER based on the protection of DECL.  */

-
-static void

[PATCH] wwwdocs: readings: Add PRU documents

2021-06-02 Thread Dimitar Dimitrov
With TI official wiki gone, let's put stable links to their proprietary
toolchain documents, which happen to describe ABI and instruction set.

Signed-off-by: Dimitar Dimitrov 
---
 htdocs/readings.html | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/htdocs/readings.html b/htdocs/readings.html
index 33ed5822..b3ffbe51 100644
--- a/htdocs/readings.html
+++ b/htdocs/readings.html
@@ -256,6 +256,8 @@ names.
 
  pru
Manufacturer: Texas Instruments
+   https://www.ti.com/lit/pdf/spruij2;>PRU Assembly Instruction 
User Guide.
+   https://www.ti.com/lit/pdf/spruhv7;>TI ABI Specification 
(see chapter 6).
https://elinux.org/Category:PRU;>Community PRU 
Documentation
  
 
-- 
2.31.1



Re: [PATCH] Simplify option handling for -fsanitize-coverage

2021-06-02 Thread Jeff Law via Gcc-patches




On 5/31/2021 4:46 AM, Martin Liška wrote:

PING^1

On 5/20/21 12:43 PM, Martin Liška wrote:

The simplification patch improves option completion and
handling of the option.

Patch can bootstrap on x86_64-linux-gnu and survives regression tests.

Ready to be installed?
Thanks,
Martin

gcc/ChangeLog:

 * common.opt: Use proper Enum values.
 * opts.c (COVERAGE_SANITIZER_OPT): Remove.
 (parse_sanitizer_options): Handle only sanitizer_opts.
 (common_handle_option): Just assign value.

gcc/testsuite/ChangeLog:

 * gcc.dg/spellcheck-options-23.c: New test.

OK
jeff



[pushed] c++: missing dtor with -fno-elide-constructors [PR100838]

2021-06-02 Thread Jason Merrill via Gcc-patches
tf_no_cleanup only applies to the outermost TARGET_EXPR, and we already
clear it for nested calls in build_over_call, but in this case both
constructor calls came from convert_like, so we need to clear it in the
recursive call as well.  This revealed that we were adding an extra
ck_rvalue in direct-initialization cases where it was wrong.

Tested x86_64-pc-linux-gnu, applying to trunk.

PR c++/100838

gcc/cp/ChangeLog:

* call.c (convert_like_internal): Clear tf_no_cleanup when
recursing.
(build_user_type_conversion_1): Only add ck_rvalue if
LOOKUP_ONLYCONVERTING.

gcc/testsuite/ChangeLog:

* g++.dg/init/no-elide2.C: New test.
---
 gcc/cp/call.c |  6 +++--
 gcc/testsuite/g++.dg/init/no-elide2.C | 32 +++
 2 files changed, 36 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/init/no-elide2.C

diff --git a/gcc/cp/call.c b/gcc/cp/call.c
index 90192b1b8aa..17fc60cd4af 100644
--- a/gcc/cp/call.c
+++ b/gcc/cp/call.c
@@ -4110,7 +4110,7 @@ build_user_type_conversion_1 (tree totype, tree expr, int 
flags,
{
  cand->second_conv = build_identity_conv (totype, NULL_TREE);
 
- /* If totype isn't a reference, and LOOKUP_NO_TEMP_BIND isn't
+ /* If totype isn't a reference, and LOOKUP_ONLYCONVERTING is
 set, then this is copy-initialization.  In that case, "The
 result of the call is then used to direct-initialize the
 object that is the destination of the copy-initialization."
@@ -4119,6 +4119,8 @@ build_user_type_conversion_1 (tree totype, tree expr, int 
flags,
 We represent this in the conversion sequence with an
 rvalue conversion, which means a constructor call.  */
  if (!TYPE_REF_P (totype)
+ && cxx_dialect < cxx17
+ && (flags & LOOKUP_ONLYCONVERTING)
  && !(convflags & LOOKUP_NO_TEMP_BIND))
cand->second_conv
  = build_conv (ck_rvalue, totype, cand->second_conv);
@@ -7800,7 +7802,7 @@ convert_like_internal (conversion *convs, tree expr, tree 
fn, int argnum,
   expr = convert_like (next_conversion (convs), expr, fn, argnum,
   convs->kind == ck_ref_bind
   ? issue_conversion_warnings : false,
-  c_cast_p, complain);
+  c_cast_p, complain & ~tf_no_cleanup);
   if (expr == error_mark_node)
 return error_mark_node;
 
diff --git a/gcc/testsuite/g++.dg/init/no-elide2.C 
b/gcc/testsuite/g++.dg/init/no-elide2.C
new file mode 100644
index 000..9a0ba1936ab
--- /dev/null
+++ b/gcc/testsuite/g++.dg/init/no-elide2.C
@@ -0,0 +1,32 @@
+// PR c++/100838
+// { dg-do run }
+// { dg-additional-options -fno-elide-constructors }
+
+extern "C" int puts (const char *);
+
+int c,d;
+class MyString {
+public:
+  MyString(const char* s = "") {
+puts ("ctor");
+++c;
+  }
+  ~MyString() {
+puts ("dtor");
+++d;
+  }
+  MyString(const MyString& s) {
+puts ("copy ctor");
+++c;
+  }
+  MyString& operator=(const MyString& s);
+};
+
+int main() {
+  {
+MyString s1 = "Hello";
+puts ("main");
+  }
+  if (c != d)
+__builtin_abort();
+}

base-commit: 659cc7d6320aae7ab390b5886f0efed22f78e244
-- 
2.27.0



[PATCH] c++: cv-qualified dependent name of alias tmpl [PR100592]

2021-06-02 Thread Patrick Palka via Gcc-patches
Here, the dependent template name in the return type of f() resolves to
an alias of int& after substitution, and we end up complaining about
qualifying this reference type with 'const' from cp_build_qualified_type
rather than just silently dropping the qualification as per [dcl.ref]/1.

We already have the tf_ignore_bad_quals flag for this situation, but
the TYPENAME_TYPE branch of tsubst for some reason doesn't always use
this flag.  This patch just makes tsubst unconditionally use this flag
when substituting a TYPENAME_TYPE.

This change also causes us to drop bogus __restrict__ qualifiers more
consistently during substitution, as in qualttp20.C below where we no
longer diagnose the __restrict__ qualifier on B1::r.  Note that if
we artificially introduced a typedef as in B1::s we silently dropped
__restrict__ even before this patch, so this seems like an improvement.

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for
trunk?

PR c++/100592

gcc/cp/ChangeLog:

* pt.c (tsubst) : Always pass
tf_ignore_bad_quals to cp_build_qualified_type.

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/alias-decl-71.C: New test.
* g++.dg/template/qualttp20.C: Remove dg-error and augment.
---
 gcc/cp/pt.c| 10 --
 gcc/testsuite/g++.dg/cpp0x/alias-decl-71.C | 13 +
 gcc/testsuite/g++.dg/template/qualttp20.C  |  6 --
 3 files changed, 21 insertions(+), 8 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/alias-decl-71.C

diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index 86259e900e9..2da5407a2a7 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -16066,10 +16066,7 @@ tsubst (tree t, tree args, tsubst_flags_t complain, 
tree in_decl)
if (f == error_mark_node)
  return f;
if (TREE_CODE (f) == TYPE_DECL)
- {
-   complain |= tf_ignore_bad_quals;
-   f = TREE_TYPE (f);
- }
+ f = TREE_TYPE (f);
 
if (TREE_CODE (f) != TYPENAME_TYPE)
  {
@@ -16091,8 +16088,9 @@ tsubst (tree t, tree args, tsubst_flags_t complain, 
tree in_decl)
  }
  }
 
-   return cp_build_qualified_type_real
- (f, cp_type_quals (f) | cp_type_quals (t), complain);
+   int quals = cp_type_quals (f) | cp_type_quals (t);
+   complain |= tf_ignore_bad_quals;
+   return cp_build_qualified_type_real (f, quals, complain);
   }
 
 case UNBOUND_CLASS_TEMPLATE:
diff --git a/gcc/testsuite/g++.dg/cpp0x/alias-decl-71.C 
b/gcc/testsuite/g++.dg/cpp0x/alias-decl-71.C
new file mode 100644
index 000..6a61f93a0b0
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/alias-decl-71.C
@@ -0,0 +1,13 @@
+// PR c++/100592
+// { dg-do compile { target c++11 } }
+
+template
+struct meta {
+  template using if_c = int&;
+};
+
+template
+typename meta::template if_c const f();
+
+using type = decltype(f());
+using type = int&;
diff --git a/gcc/testsuite/g++.dg/template/qualttp20.C 
b/gcc/testsuite/g++.dg/template/qualttp20.C
index 52989bae538..3281f5d9eab 100644
--- a/gcc/testsuite/g++.dg/template/qualttp20.C
+++ b/gcc/testsuite/g++.dg/template/qualttp20.C
@@ -10,13 +10,15 @@ struct AS
 {
   typedef void (myT) ();
   struct L {};
+  typedef struct {} M;
   
 };
 
 
 template  struct B1 : T
 {
-  typedef typename T::L __restrict__ r;// { dg-error "'__restrict__' 
qualifiers cannot" }
+  typedef typename T::L __restrict__ r;
+  typedef typename T::M __restrict__ s;
   typedef typename T::myT __restrict__ p;
 
   // The following are DR 295 dependent
@@ -32,5 +34,5 @@ template  struct B2 : T
   myconst b;
 };
 
-B1 b1; // { dg-message "required" }
+B1 b1;
 B2 b2;
-- 
2.32.0.rc2



[PATCH] c++: using-enum and access specifiers [PR100862]

2021-06-02 Thread Patrick Palka via Gcc-patches
When copying the enumerators imported by a class-scope using-enum
declaration, we need to override current_access_specifier so that
finish_member_declaration gives them the same access as the using-enum
decl.  The processing of a using-enum is performed after we've seen the
entire definition of the class, so current_access_specifier at this
point is otherwise set to the last access specifier within the class.

For consistency, this patch makes build_enumerator use
set_current_access_from_decl too.

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for
trunk/11?

PR c++/100862

gcc/cp/ChangeLog:

* pt.c (set_current_access_from_decl): Move to ...
* class.c (set_current_access_from_decl): ... here.
(handle_using_decl): Use it to propagate the access of the
using-enum decl to the copy of the imported enumerator.
* cp-tree.h (set_current_access_from_decl): Declare.
* decl.c (build_enumerator): Simplify using make_temp_override
and set_current_access_from_decl.

gcc/testsuite/ChangeLog:

* g++.dg/cpp2a/using-enum-9.C: New test.
---
 gcc/cp/class.c| 15 
 gcc/cp/cp-tree.h  |  1 +
 gcc/cp/decl.c | 12 ++
 gcc/cp/pt.c   | 14 
 gcc/testsuite/g++.dg/cpp2a/using-enum-9.C | 28 +++
 5 files changed, 46 insertions(+), 24 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp2a/using-enum-9.C

diff --git a/gcc/cp/class.c b/gcc/cp/class.c
index 354addde773..b53a4dbdd4e 100644
--- a/gcc/cp/class.c
+++ b/gcc/cp/class.c
@@ -207,6 +207,19 @@ static bool type_maybe_constexpr_default_constructor 
(tree);
 static bool type_maybe_constexpr_destructor (tree);
 static bool field_poverlapping_p (tree);
 
+/* Set CURRENT_ACCESS_SPECIFIER based on the protection of DECL.  */
+
+void
+set_current_access_from_decl (tree decl)
+{
+  if (TREE_PRIVATE (decl))
+current_access_specifier = access_private_node;
+  else if (TREE_PROTECTED (decl))
+current_access_specifier = access_protected_node;
+  else
+current_access_specifier = access_public_node;
+}
+
 /* Return a COND_EXPR that executes TRUE_STMT if this execution of the
'structor is in charge of 'structing virtual bases, or FALSE_STMT
otherwise.  */
@@ -1359,6 +1372,8 @@ handle_using_decl (tree using_decl, tree t)
 CONST_DECL_USING_P is true.  */
   gcc_assert (TREE_CODE (decl) == CONST_DECL);
 
+  auto cas = make_temp_override (current_access_specifier);
+  set_current_access_from_decl (using_decl);
   tree copy = copy_decl (decl);
   DECL_CONTEXT (copy) = t;
   DECL_ARTIFICIAL (copy) = true;
diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index c95a820037f..b1b7e615bcc 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -8186,6 +8186,7 @@ struct atom_hasher : default_hash_traits
 extern bool subsumes(tree, tree);
 
 /* In class.c */
+extern void set_current_access_from_decl (tree);
 extern void cp_finish_injected_record_type (tree);
 
 /* in vtable-class-hierarchy.c */
diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c
index e7268d5ad18..fb21a3a1ae8 100644
--- a/gcc/cp/decl.c
+++ b/gcc/cp/decl.c
@@ -16333,17 +16333,9 @@ incremented enumerator value is too large for 
%"));
 
 For which case we need to make sure that the access of `S::i'
 matches the access of `S::E'.  */
-  tree saved_cas = current_access_specifier;
-  if (TREE_PRIVATE (TYPE_NAME (enumtype)))
-   current_access_specifier = access_private_node;
-  else if (TREE_PROTECTED (TYPE_NAME (enumtype)))
-   current_access_specifier = access_protected_node;
-  else
-   current_access_specifier = access_public_node;
-
+  auto cas = make_temp_override (current_access_specifier);
+  set_current_access_from_decl (TYPE_NAME (enumtype));
   finish_member_declaration (decl);
-
-  current_access_specifier = saved_cas;
 }
   else
 pushdecl (decl);
diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index 31302803c62..86259e900e9 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -190,7 +190,6 @@ static tree tsubst_arg_types (tree, tree, tree, 
tsubst_flags_t, tree);
 static tree tsubst_function_type (tree, tree, tsubst_flags_t, tree);
 static bool check_specialization_scope (void);
 static tree process_partial_specialization (tree);
-static void set_current_access_from_decl (tree);
 static enum template_base_result get_template_base (tree, tree, tree, tree,
bool , tree *);
 static tree try_class_unification (tree, tree, tree, tree, bool);
@@ -26431,19 +26430,6 @@ tsubst_initializer_list (tree t, tree argvec)
   return inits;
 }
 
-/* Set CURRENT_ACCESS_SPECIFIER based on the protection of DECL.  */
-
-static void
-set_current_access_from_decl (tree decl)
-{
-  if (TREE_PRIVATE (decl))
-current_access_specifier = 

Re: [PATCH] arm: Auto-vectorization for MVE: vabs

2021-06-02 Thread Richard Sandiford via Gcc-patches
Christophe Lyon  writes:
> This patch adds support for auto-vectorization of absolute value
> computation using vabs.
>
> We use a similar pattern to what is used in neon.md and extend the
> existing neg2 expander to match both 'neg' and 'abs'.  This
> implies renaming the existing abs2 define_insn in neon.md to
> avoid a clash with the new expander with the same name.
>
> 2021-05-26  Christophe Lyon  
>
>   gcc/
>   * config/arm/mve.md (mve_vabsq_f): Use 'abs' instead of unspec.
>   (mve_vabsq_s): Likewise.
>   * config/arm/neon.md (abs2): Rename to neon_abs2.
>   * config/arm/unspecs.md (VABSQ_F, VABSQ_S): Delete.
>   * config/arm/vec-common.md (neg2): Rename to
>   2.
>
>   gcc/testsuite/
>   * gcc.target/arm/simd/mve-vabs.c: New test.

OK, thanks.

Richard

> ---
>  gcc/config/arm/mve.md|  6 +--
>  gcc/config/arm/neon.md   |  2 +-
>  gcc/config/arm/unspecs.md|  2 -
>  gcc/config/arm/vec-common.md |  4 +-
>  gcc/testsuite/gcc.target/arm/simd/mve-vabs.c | 44 
>  5 files changed, 49 insertions(+), 9 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/arm/simd/mve-vabs.c
>
> diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
> index 0a6ba80c99d..0bfa6a91d55 100644
> --- a/gcc/config/arm/mve.md
> +++ b/gcc/config/arm/mve.md
> @@ -269,8 +269,7 @@ (define_insn "mve_vdupq_n_f"
>  (define_insn "mve_vabsq_f"
>[
> (set (match_operand:MVE_0 0 "s_register_operand" "=w")
> - (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w")]
> -  VABSQ_F))
> + (abs:MVE_0 (match_operand:MVE_0 1 "s_register_operand" "w")))
>]
>"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
>"vabs.f%#  %q0, %q1"
> @@ -481,8 +480,7 @@ (define_insn "@mve_vaddvq_"
>  (define_insn "mve_vabsq_s"
>[
> (set (match_operand:MVE_2 0 "s_register_operand" "=w")
> - (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "w")]
> -  VABSQ_S))
> + (abs:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w")))
>]
>"TARGET_HAVE_MVE"
>"vabs.s%#\t%q0, %q1"
> diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
> index 6a6573317cf..077c62ffd20 100644
> --- a/gcc/config/arm/neon.md
> +++ b/gcc/config/arm/neon.md
> @@ -739,7 +739,7 @@ (define_insn "one_cmpl2_neon"
>[(set_attr "type" "neon_move")]
>  )
>  
> -(define_insn "abs2"
> +(define_insn "neon_abs2"
>[(set (match_operand:VDQW 0 "s_register_operand" "=w")
>   (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
>"TARGET_NEON"
> diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md
> index 0778db1bf4f..ed1bc293b78 100644
> --- a/gcc/config/arm/unspecs.md
> +++ b/gcc/config/arm/unspecs.md
> @@ -538,7 +538,6 @@ (define_c_enum "unspec" [
>VRNDAQ_F
>VREV64Q_F
>VDUPQ_N_F
> -  VABSQ_F
>VREV32Q_F
>VCVTTQ_F32_F16
>VCVTBQ_F32_F16
> @@ -562,7 +561,6 @@ (define_c_enum "unspec" [
>VCLSQ_S
>VADDVQ_S
>VADDVQ_U
> -  VABSQ_S
>VREV32Q_U
>VREV32Q_S
>VMOVLTQ_U
> diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md
> index 8e35151da46..80b273229f5 100644
> --- a/gcc/config/arm/vec-common.md
> +++ b/gcc/config/arm/vec-common.md
> @@ -208,9 +208,9 @@ (define_expand "one_cmpl2"
>"ARM_HAVE__ARITH && !TARGET_REALLY_IWMMXT"
>  )
>  
> -(define_expand "neg2"
> +(define_expand "2"
>[(set (match_operand:VDQWH 0 "s_register_operand" "")
> - (neg:VDQWH (match_operand:VDQWH 1 "s_register_operand" "")))]
> + (ABSNEG:VDQWH (match_operand:VDQWH 1 "s_register_operand" "")))]
>"ARM_HAVE__ARITH && !TARGET_REALLY_IWMMXT"
>  )
>  
> diff --git a/gcc/testsuite/gcc.target/arm/simd/mve-vabs.c 
> b/gcc/testsuite/gcc.target/arm/simd/mve-vabs.c
> new file mode 100644
> index 000..64cd1c2eb4a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/simd/mve-vabs.c
> @@ -0,0 +1,44 @@
> +/* { dg-do assemble } */
> +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
> +/* { dg-add-options arm_v8_1m_mve_fp } */
> +/* { dg-additional-options "-O3 -funsafe-math-optimizations" } */
> +
> +#include 
> +#include 
> +
> +#define ABS(a) ((a < 0) ? -a : a)
> +
> +#define FUNC(SIGN, TYPE, BITS, NB, NAME) \
> +  void test_ ## NAME ##_ ## SIGN ## BITS ## x ## NB (TYPE##BITS##_t * 
> __restrict__ dest, TYPE##BITS##_t *a) { \
> +int i;   \
> +for (i=0; i +  dest[i] = ABS(a[i]);   \
> +}
> \
> +}
> +
> +#define FUNC_FLOAT(SIGN, TYPE, BITS, NB, NAME)   
> \
> +  void test_ ## NAME ##_ ## SIGN ## BITS ## x ## NB (TYPE * __restrict__ 
> dest, TYPE *a) { \
> +int i;   \
> +for (i=0; i +  dest[i] = ABS(a[i]); 

Re: [PATCH] xtensa: Fix 2 warnings during xtensa build [PR100841]

2021-06-02 Thread Jeff Law via Gcc-patches




On 6/2/2021 11:09 AM, Jakub Jelinek wrote:

Hi!

When building gcc targetting xtensa-linux, there are 2 warnings the PR
complains about:
../../gcc/dwarf2cfi.c: In function ‘void init_one_dwarf_reg_size(int, 
machine_mode, rtx, machine_mode, init_one_dwarf_reg_state*)’:
../../gcc/dwarf2cfi.c:291:12: warning: comparison of integer expressions of 
different signedness: ‘const unsigned int’ and ‘int’ [-Wsign-compare]
   291 |   if (rnum >= DWARF_FRAME_REGISTERS)
../../gcc/function.c: In function ‘void gen_call_used_regs_seq(rtx_insn*, 
unsigned int)’:
../../gcc/function.c:5897:63: warning: comparison of unsigned expression in ‘< 
0’ is always false [-Wtype-limits]
  5897 |   if (crtl->uses_only_leaf_regs && LEAF_REG_REMAP (regno) < 0)
which might during bootstrap or when configured with --enable-werror-always
be turned into errors.

The first one is the -Wsign-compare warning, in c-family we do:
2281  /* Do not warn if the signed quantity is an unsuffixed integer
2282 literal (or some static constant expression involving such
2283 literals or a conditional expression involving such literals)
2284 and it is non-negative.  */
2285  if (tree_expr_nonnegative_warnv_p (sop, ))
2286/* OK */;
and so don't warn if that function determines the expression is
non-negative.  But xtensa defines DWARF_FRAME_REGISTERS as
(16 + (something ? 0 : 1)) and that isn't handled by
tree_expr_nonnegative_warnv_p, VRP can handle it of course, but that is much
later.
The second chunk rewrites it into a form that tree_expr_nonnegative_warnv_p
can handle, in particular (something ? 16 : 16 + 1), where for COND_EXPRs
that function checks both the 2nd and 3rd operand of the ternary operator
and if both are nonnegative, returns true.

The other warning has been introduced fairly recently; LEAF_REG_REMAP is
currently used by 2 targets only, and is documented to yield -1 if a hard
reg number can't be remapped and the remapped register number otherwise.
That means that the type of the expression should be signed (otherwise -1
could never appear), and on SPARC indeed it is defined as
extern char leaf_reg_remap[];
#define LEAF_REG_REMAP(REGNO) (leaf_reg_remap[REGNO])
so unless the host is -funsigned-char by default it works fine.
I guess sparc.[ch] should be fixed to use signed char of leaf_reg_remap,
Eric?
The argument to LEAF_REG_REMAP is often unsigned int though, hard
register numbers are usually not negative, and thus the warning.
I think xtensa doesn't have 2G hard registers and so it is ok to just cast
it to int.

Verified just by making sure the warnings go away in a cross, ok for trunk?

2021-06-02  Jakub Jelinek  

PR target/100841
* config/xtensa/xtensa.h (LEAF_REG_REMAP): Cast REGNO to int to avoid
-Wtype-limits warnings.
(DWARF_FRAME_REGISTER): Rewrite into ternary operator with addition
in operands to avoid -Wsign-compare warnings.

OK.
jeff


Re: [RFC/PATCH 00/11] Fix up some unexpected empty split conditions

2021-06-02 Thread Jeff Law via Gcc-patches




On 6/2/2021 11:32 AM, Richard Sandiford wrote:

Richard Biener  writes:

On Wed, Jun 2, 2021 at 12:01 PM Kewen.Lin  wrote:

on 2021/6/2 下午5:13, Richard Sandiford wrote:

"Kewen.Lin"  writes:

Hi Richard,

on 2021/6/2 锟斤拷锟斤拷4:11, Richard Sandiford wrote:

Kewen Lin  writes:

Hi all,

define_insn_and_split should avoid to use empty split condition
if the condition for define_insn isn't empty, otherwise it can
sometimes result in unexpected consequence, since the split
will always be done even if the insn condition doesn't hold.

To avoid forgetting to add "&& 1" onto split condition, as
Segher suggested in thread[1], this series is to add the check
and raise an error if it catches the unexpected cases.  With
this new check, we have to fix up some existing
define_insn_and_split which are detected as error.  I hope all
these places are not intentional to be kept as blank.

I wonder whether we should instead redefine the semantics of
define_insn_and_split so that the split condition is always applied
on top of the insn condition.  It's rare for a define_insn_and_split
to have independent insn and split conditions, so at the moment,
we're making the common case hard.


Just want to confirm that the suggestion is just applied for empty
split condition or all split conditions in define_insn_and_split?
I guess it's the former?

No, I meant tha latter.  E.g. in:

(define_insn_and_split
   […]
   "TARGET_FOO"
   "…"
   […]
   "reload_completed"
   […]
)

the "reload_completed" condition is almost always a typo for
"&& reload_completed".

Like I say, it rarely makes sense for the split condition to
ignore the insn condition and specify an entirely independent condition.
There might be some define_insn_and_splits that do that, but it'd often
be less confusing to write the insn and split separately if so.

Even if we do want to support independent insn and split conditions,
that's always going to be the rare and surprising case, so it's the one
that should need extra syntax.


Thanks for the clarification!

Since it may impact all ports, I wonder if there is a way to find out
this kind of "rare and surprising" case without a big coverage testing?
I'm happy to make a draft patch for it, but not sure how to early catch
those cases which need to be rewritten for those ports that I can't test
on (even with cfarm machines, the coverage seems still limited).

So what Richard suggests would be to disallow split conditions
that do not start with "&& ", it's probably easy to do that as well
and look for build fails.  That should catch all cases to look at.

Yeah.  As a strawman proposal, how about:

- add a new "define_independent_insn_and_split" that has the
   current semantics of define_insn_and_split.  This should be
   mechanical.

- find the define_insn_and_splits that are missing the "&&", and where
   missing the "&&" might make a difference.  Change them to
   define_independent_insn_and_splits.

   Like Richard says, this can be done by temporarily disallowing
   define_insn_and_splits that have no "&&".

   I think this should remain a mechanical step.  If port maintainers
   think that the missing "&&" is a mistake, they should fix it as
   a separate patch.

- flip the default for define_insn_and_split so that the "&&" is implicit
   (but can still be specified redundantly)

Then port maintainers who don't mind the churn can remove the
redundant "&&"s from the remaining define_insn_and_splits.
That works for me.  If we'd had this in place earlier I wouldn't have 
mucked up the H8 port.

jeff


Re: [PATCH] arm: Auto-vectorization for MVE and Neon: vhadd/vrhadd

2021-06-02 Thread Richard Sandiford via Gcc-patches
Christophe Lyon  writes:
> This patch adds support for auto-vectorization of average value
> computation using vhadd or vrhadd, for both MVE and Neon.
>
> The patch adds the needed [u]avg3_[floor|ceil] patterns to
> vec-common.md, I'm not sure how to factorize them without introducing
> an unspec iterator?

Yeah, an int iterator would be one way, but I'm not sure it would
make things better given the differences in how Neon and MVE handle
their unspecs.

> It also adds tests for 'floor' and for 'ceil', each for MVE and Neon.
>
> Vectorization works with 8-bit and 16 bit input/output vectors, but
> not with 32-bit ones because the vectorizer expects wider types
> availability for the intermediate values, but int32_t + int32_t does
> not involve wider types in the IR.

Right.  Like you say, it's only valid to use V(R)HADD if, in the source
code, the addition and shift have a wider precision than the operands.
That happens naturally for 8-bit and 16-bit operands, since C arithmetic
promotes them to "int" first.  But for 32-bit operands, the C code needs
to do the addition and shift in 64 bits.  Doing them in 64 bits should
be fine for narrower operands too.

So:

> diff --git a/gcc/testsuite/gcc.target/arm/simd/mve-vhadd-1.c 
> b/gcc/testsuite/gcc.target/arm/simd/mve-vhadd-1.c
> new file mode 100644
> index 000..40489ecc67d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/simd/mve-vhadd-1.c
> @@ -0,0 +1,31 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target arm_v8_1m_mve_ok } */
> +/* { dg-add-options arm_v8_1m_mve } */
> +/* { dg-additional-options "-O3" } */
> +
> +#include 
> +
> +#define FUNC(SIGN, TYPE, BITS, OP, NAME) \
> +  void test_ ## NAME ##_ ## SIGN ## BITS (TYPE##BITS##_t * __restrict__ 
> dest, \
> +   TYPE##BITS##_t *a, TYPE##BITS##_t *b) 
> { \
> +int i;   \
> +for (i=0; i < (128 / BITS); i++) {   
> \
> +  dest[i] = (a[i] OP b[i]) >> 1; \
> +}
> \
> +}
> +

…it should work if you make this "((int64_t) a[i] OP b[i]) >> 1".

> As noted in neon-vhadd-1.c, I couldn't write a test able to use Neon
> vectorization with 64-bit vectors: we default to
> -mvectorize-with-neon-quad, and attempts to use
> -mvectorize-with-neon-double resulted in much worse code, which this
> patch does not aim at improving.

I guess this is because the MVE_2 mode iterators only include 128-bit types.
Leaving Neon double as future work sounds good though.

And yeah, the code for V(R)HADD-equivalent operations is much worse when
V(R)HADD isn't available, since the compiler really does need to double
the precision of the operands, do double-precision addition,
do double-precision shifts, and then truncate back.  So this looks
like the expected behaviour.

Thanks,
Richard


Re: [PATCH] ARC: gcc driver default to hs38_linux

2021-06-02 Thread Vineet Gupta via Gcc-patches
On 6/2/21 1:38 AM, Claudiu Zissulescu wrote:
> Approved.

Thx for the super quick action on this Claudiu. Can this be slated for 
backports too as it causes issues when building toolchains for modern 
cores without explicit defaults.

-Vineet

>
> //Claudiu
> 
> *From:* Vineet Gupta 
> *Sent:* Tuesday, June 1, 2021 10:42 PM
> *To:* gcc-patches@gcc.gnu.org 
> *Cc:* Claudiu Zissulescu ; 
> linux-snps-...@lists.infradead.org 
> ; Vineet Gupta 
> *Subject:* [PATCH] ARC: gcc driver default to hs38_linux
> arc700 is legacy and there's no active development for it, so switch to
> latest hs38_linux as default
>
> Signed-off-by: Vineet Gupta 
> ---
>  gcc/config/arc/arc.h | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/gcc/config/arc/arc.h b/gcc/config/arc/arc.h
> index bd1fe0abd7af..252241a858c9 100644
> --- a/gcc/config/arc/arc.h
> +++ b/gcc/config/arc/arc.h
> @@ -34,7 +34,7 @@ along with GCC; see the file COPYING3. If not see
>  #define SYMBOL_FLAG_CMEM    (SYMBOL_FLAG_MACH_DEP << 3)
>
>  #ifndef TARGET_CPU_DEFAULT
> -#define TARGET_CPU_DEFAULT PROCESSOR_arc700
> +#define TARGET_CPU_DEFAULT PROCESSOR_hs38_linux
>  #endif
>
>  /* Check if this symbol has a long_call attribute in its declaration */
> -- 
> 2.25.1
>



Re: [RFC/PATCH 00/11] Fix up some unexpected empty split conditions

2021-06-02 Thread Richard Sandiford via Gcc-patches
Richard Biener  writes:
> On Wed, Jun 2, 2021 at 12:01 PM Kewen.Lin  wrote:
>>
>> on 2021/6/2 下午5:13, Richard Sandiford wrote:
>> > "Kewen.Lin"  writes:
>> >> Hi Richard,
>> >>
>> >> on 2021/6/2 锟斤拷锟斤拷4:11, Richard Sandiford wrote:
>> >>> Kewen Lin  writes:
>>  Hi all,
>> 
>>  define_insn_and_split should avoid to use empty split condition
>>  if the condition for define_insn isn't empty, otherwise it can
>>  sometimes result in unexpected consequence, since the split
>>  will always be done even if the insn condition doesn't hold.
>> 
>>  To avoid forgetting to add "&& 1" onto split condition, as
>>  Segher suggested in thread[1], this series is to add the check
>>  and raise an error if it catches the unexpected cases.  With
>>  this new check, we have to fix up some existing
>>  define_insn_and_split which are detected as error.  I hope all
>>  these places are not intentional to be kept as blank.
>> >>>
>> >>> I wonder whether we should instead redefine the semantics of
>> >>> define_insn_and_split so that the split condition is always applied
>> >>> on top of the insn condition.  It's rare for a define_insn_and_split
>> >>> to have independent insn and split conditions, so at the moment,
>> >>> we're making the common case hard.
>> >>>
>> >>
>> >> Just want to confirm that the suggestion is just applied for empty
>> >> split condition or all split conditions in define_insn_and_split?
>> >> I guess it's the former?
>> >
>> > No, I meant tha latter.  E.g. in:
>> >
>> > (define_insn_and_split
>> >   […]
>> >   "TARGET_FOO"
>> >   "…"
>> >   […]
>> >   "reload_completed"
>> >   […]
>> > )
>> >
>> > the "reload_completed" condition is almost always a typo for
>> > "&& reload_completed".
>> >
>> > Like I say, it rarely makes sense for the split condition to
>> > ignore the insn condition and specify an entirely independent condition.
>> > There might be some define_insn_and_splits that do that, but it'd often
>> > be less confusing to write the insn and split separately if so.
>> >
>> > Even if we do want to support independent insn and split conditions,
>> > that's always going to be the rare and surprising case, so it's the one
>> > that should need extra syntax.
>> >
>>
>> Thanks for the clarification!
>>
>> Since it may impact all ports, I wonder if there is a way to find out
>> this kind of "rare and surprising" case without a big coverage testing?
>> I'm happy to make a draft patch for it, but not sure how to early catch
>> those cases which need to be rewritten for those ports that I can't test
>> on (even with cfarm machines, the coverage seems still limited).
>
> So what Richard suggests would be to disallow split conditions
> that do not start with "&& ", it's probably easy to do that as well
> and look for build fails.  That should catch all cases to look at.

Yeah.  As a strawman proposal, how about:

- add a new "define_independent_insn_and_split" that has the
  current semantics of define_insn_and_split.  This should be
  mechanical.

- find the define_insn_and_splits that are missing the "&&", and where
  missing the "&&" might make a difference.  Change them to
  define_independent_insn_and_splits.

  Like Richard says, this can be done by temporarily disallowing
  define_insn_and_splits that have no "&&".

  I think this should remain a mechanical step.  If port maintainers
  think that the missing "&&" is a mistake, they should fix it as
  a separate patch.

- flip the default for define_insn_and_split so that the "&&" is implicit
  (but can still be specified redundantly)

Then port maintainers who don't mind the churn can remove the
redundant "&&"s from the remaining define_insn_and_splits.

Thanks,
Richard


Re: GCC documentation: porting to Sphinx

2021-06-02 Thread Joseph Myers
On Mon, 31 May 2021, Martin Liška wrote:

> https://splichal.eu/scripts/sphinx/

Looking at some examples there:

https://splichal.eu/scripts/sphinx/gcc/_build/html/c-implementation-defined-behavior/preprocessing-directives.html
 
has some conversion problems:

* "See Implementation-defined behavior, for details of these aspects of 
implementation-defined behavior." is missing the link to the relevant 
section of the cpp manual that's present in the Texinfo source.

* "` character before the :samp:`" is a misconversion (whether from 
Texinfo to RST or from RST to HTML) of the Texinfo source

  @samp{\} character before the @samp{\}

which will need to be fixed.

* The corresponding PDF has the same issues as above (so probably they are 
issues with the conversion to RST, not with Sphinx itself).  In addition, 
the PDF manual ought to be using fixed-width fonts for literal code, 
command-line options, etc., just like the HTML manual, and the 
Texinfo-generated PDF manual, are.

https://splichal.eu/scripts/sphinx/gcc/_build/html/gcc-command-options/passing-options-to-the-assembler.html
 
shows headings such as "-Wa,option, -Wa".  The ", -Wa" doesn't make sense, 
this option is just "-Wa,option".

https://splichal.eu/scripts/sphinx/gcc/_build/html/gcov-a-test-coverage-program.html
 
has a hyphen between "gcov" and "a Test Coverage Program" in the heading.  
It should be an em dash, as in Texinfo.

https://splichal.eu/scripts/sphinx/gcc/_build/html/language-standards-supported-by-gcc/c%2B%2B-language.html
 
has doubled slashes in various URLs where the Texinfo source has /@/ 
(Texinfo @/ means "allow line break", it should not be translated to /).

https://splichal.eu/scripts/sphinx/gcc/_build/html/gcc-command-options/machine-dependent-options/aarch64-options.html
 
shows different formatting for the headings for "-mlow-precision-div, 
-mno-low-precision-div" and "-mtrack-speculation -mno-track-speculation".  
The formatting should be identical.  The only difference in the Texinfo 
source seems to be that the latter is missing @opindex directives.  And 
while it's a bug in the Texinfo source that those directives are missing, 
the presence or absence of index entries should not affect the formatting 
of the documentation for those options.

On that same page, the output for -march=name is broken, containing a 
literal :samp:{feature} (in general, checking for any places where RST 
directives such as :samp: appear in the HTML output might be a good idea 
to look for broken conversions).  The Texinfo source here has

@option{-march=@var{arch}@r{@{}+@r{[}no@r{]}@var{feature}@r{@}*}}

(where the use of @r{...} is to put the {}[]* characters in a 
variable-width font, since they are not literally part of the option, 
while the other characters that are literally part of the option should be 
in a variable-width font).

https://splichal.eu/scripts/sphinx/gcc/_build/html/language-standards-supported-by-gcc/references-for-other-languages.html
 
has literal unconverted "@c man" and "@include" and other Texinfo 
directives.  Searching for such things in the HTML output (or the RST 
sources) is a good idea, just like searching for literal RST directives in 
the HTML output, to find other such conversion bugs.

https://splichal.eu/scripts/sphinx/gcc/_build/html/gcc-command-options.html 
says "See option-index", another case with a link that didn't get 
converted properly.  It also has raw :samp: uses indicating a 
misconversion.

I'm not sure how you're determining languages for code-block, but 
https://splichal.eu/scripts/sphinx/gcc/_build/html/gcc-command-options/options-to-control-diagnostic-messages-formatting.html
 
certainly shows some cases where they have been misidentified (e.g. random 
C++ keywords highlighted in the default GCC_COLORS, some JSON being 
highlighted as such but other JSON not).

> - a shared content is factored out ([4])
> - conditional build is fully supported (even for shared parts)
> - manual pages look reasonable well
> - folders are created for files which have >= 5 TOC tree entries
> - various formatting issues were resolved
> - baseconf.py reads BASE-VER, DEV-PHASE, .. files

Could you give more detailed descriptions of how each of the various 
issues I listed in 2015 are addressed here?

https://gcc.gnu.org/legacy-ml/gcc-patches/2015-11/msg01139.html

> I've got couple of questions:
> 
> 1) Do we have to you the following cover text?
>Copyright (c) 1988-2020 Free Software Foundation, Inc.
> 
>Permission is granted to copy, distribute and/or modify this document
> under the terms of the GNU Free Documentation License, Version 1.3 or any
> later version published by the Free Software Foundation; with the Invariant
> Sections being "GNU General Public
>License" and "Funding Free Software", the Front-Cover texts being (a)
> (see below), and with the Back-Cover Texts being (b) (see below).  A copy of
> the license is included in the gfdl(7) man page.
> 
>(a) 

Re: [PATCH] predcom: Enabled by loop vect at O2 [PR100794]

2021-06-02 Thread Richard Sandiford via Gcc-patches
"Kewen.Lin via Gcc-patches"  writes:
> Hi,
>
> As PR100794 shows, in the current implementation PRE bypasses
> some optimization to avoid introducing loop carried dependence
> which stops loop vectorizer to vectorize the loop.  At -O2,
> there is no downstream pass to re-catch this kind of opportunity
> if loop vectorizer fails to vectorize that loop.
>
> This patch follows Richi's suggestion in the PR, if predcom flag
> isn't set and loop vectorization will enable predcom without any
> unrolling implicitly.  The Power9 SPEC2017 evaluation showed it
> can speed up 521.wrf_r 3.30% and 554.roms_r 1.08% at very-cheap
> cost model, no remarkable impact at cheap cost model, the build
> time and size impact is fine (see the PR for the details).
>
> By the way, I tested another proposal to guard PRE not skip the
> optimization for cheap and very-cheap vect cost models, the
> evaluation results showed it's fine with very cheap cost model,
> but it can degrade some bmks like 521.wrf_r -9.17% and
> 549.fotonik3d_r -2.07% etc.
>
> Bootstrapped/regtested on powerpc64le-linux-gnu P9,
> x86_64-redhat-linux and aarch64-linux-gnu.
>
> Is it ok for trunk?
>
> BR,
> Kewen
> -
> gcc/ChangeLog:
>
>   PR tree-optimization/100794
>   * tree-predcom.c (tree_predictive_commoning_loop): Add parameter
>   allow_unroll_p and only allow unrolling when it's true.
>   (tree_predictive_commoning): Add parameter allow_unroll_p and
>   adjust for it.
>   (run_tree_predictive_commoning): Likewise.
>   (class pass_predcom): Add private member allow_unroll_p.
>   (pass_predcom::pass_predcom): Init allow_unroll_p.
>   (pass_predcom::gate): Check flag_tree_loop_vectorize and 
>   global_options_set.x_flag_predictive_commoning.
>   (pass_predcom::execute): Adjust for allow_unroll_p.
>
> gcc/testsuite/ChangeLog:
>
>   PR tree-optimization/100794
>   * gcc.dg/tree-ssa/pr100794.c: New test.
>
>  gcc/testsuite/gcc.dg/tree-ssa/pr100794.c | 20 +
>  gcc/tree-predcom.c   | 57 +---
>  2 files changed, 60 insertions(+), 17 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr100794.c
>
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr100794.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/pr100794.c
> new file mode 100644
> index 000..6f707ae7fba
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr100794.c
> @@ -0,0 +1,20 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -ftree-loop-vectorize -fdump-tree-pcom-details 
> -fdisable-tree-vect" } */
> +
> +extern double arr[100];
> +extern double foo (double, double);
> +extern double sum;
> +
> +void
> +test (int i_0, int i_n)
> +{
> +  int i;
> +  for (i = i_0; i < i_n - 1; i++)
> +{
> +  double a = arr[i];
> +  double b = arr[i + 1];
> +  sum += a * b;
> +}
> +}
> +
> +/* { dg-final { scan-tree-dump "Executing predictive commoning without 
> unrolling" "pcom" } } */
> diff --git a/gcc/tree-predcom.c b/gcc/tree-predcom.c
> index 02f911a08bb..65a93c8e505 100644
> --- a/gcc/tree-predcom.c
> +++ b/gcc/tree-predcom.c
> @@ -3178,13 +3178,13 @@ insert_init_seqs (class loop *loop, vec 
> chains)
> applied to this loop.  */
>  
>  static unsigned
> -tree_predictive_commoning_loop (class loop *loop)
> +tree_predictive_commoning_loop (class loop *loop, bool allow_unroll_p)
>  {
>vec datarefs;
>vec dependences;
>struct component *components;
>vec chains = vNULL;
> -  unsigned unroll_factor;
> +  unsigned unroll_factor = 0;
>class tree_niter_desc desc;
>bool unroll = false, loop_closed_ssa = false;
>  
> @@ -3272,11 +3272,13 @@ tree_predictive_commoning_loop (class loop *loop)
>dump_chains (dump_file, chains);
>  }
>  
> -  /* Determine the unroll factor, and if the loop should be unrolled, ensure
> - that its number of iterations is divisible by the factor.  */
> -  unroll_factor = determine_unroll_factor (chains);
> -  unroll = (unroll_factor > 1
> - && can_unroll_loop_p (loop, unroll_factor, ));
> +  if (allow_unroll_p)
> +/* Determine the unroll factor, and if the loop should be unrolled, 
> ensure
> +   that its number of iterations is divisible by the factor.  */
> +unroll_factor = determine_unroll_factor (chains);
> +
> +  if (unroll_factor > 1)
> +unroll = can_unroll_loop_p (loop, unroll_factor, );
>  
>/* Execute the predictive commoning transformations, and possibly unroll 
> the
>   loop.  */
> @@ -3319,7 +3321,7 @@ tree_predictive_commoning_loop (class loop *loop)
>  /* Runs predictive commoning.  */
>  
>  unsigned
> -tree_predictive_commoning (void)
> +tree_predictive_commoning (bool allow_unroll_p)
>  {
>class loop *loop;
>unsigned ret = 0, changed = 0;
> @@ -3328,7 +3330,7 @@ tree_predictive_commoning (void)
>FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
>  if (optimize_loop_for_speed_p (loop))
>{
> - changed |= tree_predictive_commoning_loop (loop);
> + changed 

Re: [PATCH 08/11] mips: Update unexpected empty split condition

2021-06-02 Thread Jeff Law via Gcc-patches




On 6/1/2021 11:04 PM, Kewen Lin via Gcc-patches wrote:

gcc/ChangeLog:

* config/mips/mips.md (, bswapsi2, bswapdi2): Fix empty
split condition.
The mips, or1k and sparc changes are fine.  They're all preserving 
existing behavior.



jeff



Re: [PATCH 05/11] h8300: Update unexpected empty split condition

2021-06-02 Thread Jeff Law via Gcc-patches




On 6/1/2021 11:04 PM, Kewen Lin wrote:

gcc/ChangeLog:

* config/h8300/combiner.md (*andsi3_lshiftrt_n_sb): Fix empty split
condition.
Hold off on this.  We may need a stronger condition in there and that's 
something I'm in the process of cleaning up in the H8 port.





jeff



[PATCH] xtensa: Fix 2 warnings during xtensa build [PR100841]

2021-06-02 Thread Jakub Jelinek via Gcc-patches
Hi!

When building gcc targetting xtensa-linux, there are 2 warnings the PR
complains about:
../../gcc/dwarf2cfi.c: In function ‘void init_one_dwarf_reg_size(int, 
machine_mode, rtx, machine_mode, init_one_dwarf_reg_state*)’:
../../gcc/dwarf2cfi.c:291:12: warning: comparison of integer expressions of 
different signedness: ‘const unsigned int’ and ‘int’ [-Wsign-compare]
  291 |   if (rnum >= DWARF_FRAME_REGISTERS)
../../gcc/function.c: In function ‘void gen_call_used_regs_seq(rtx_insn*, 
unsigned int)’:
../../gcc/function.c:5897:63: warning: comparison of unsigned expression in ‘< 
0’ is always false [-Wtype-limits]
 5897 |   if (crtl->uses_only_leaf_regs && LEAF_REG_REMAP (regno) < 0)
which might during bootstrap or when configured with --enable-werror-always
be turned into errors.

The first one is the -Wsign-compare warning, in c-family we do:
2281  /* Do not warn if the signed quantity is an unsuffixed integer
2282 literal (or some static constant expression involving such
2283 literals or a conditional expression involving such literals)
2284 and it is non-negative.  */
2285  if (tree_expr_nonnegative_warnv_p (sop, ))
2286/* OK */;
and so don't warn if that function determines the expression is
non-negative.  But xtensa defines DWARF_FRAME_REGISTERS as
(16 + (something ? 0 : 1)) and that isn't handled by
tree_expr_nonnegative_warnv_p, VRP can handle it of course, but that is much
later.
The second chunk rewrites it into a form that tree_expr_nonnegative_warnv_p
can handle, in particular (something ? 16 : 16 + 1), where for COND_EXPRs
that function checks both the 2nd and 3rd operand of the ternary operator
and if both are nonnegative, returns true.

The other warning has been introduced fairly recently; LEAF_REG_REMAP is
currently used by 2 targets only, and is documented to yield -1 if a hard
reg number can't be remapped and the remapped register number otherwise.
That means that the type of the expression should be signed (otherwise -1
could never appear), and on SPARC indeed it is defined as
extern char leaf_reg_remap[];
#define LEAF_REG_REMAP(REGNO) (leaf_reg_remap[REGNO])
so unless the host is -funsigned-char by default it works fine.
I guess sparc.[ch] should be fixed to use signed char of leaf_reg_remap,
Eric?
The argument to LEAF_REG_REMAP is often unsigned int though, hard
register numbers are usually not negative, and thus the warning.
I think xtensa doesn't have 2G hard registers and so it is ok to just cast
it to int.

Verified just by making sure the warnings go away in a cross, ok for trunk?

2021-06-02  Jakub Jelinek  

PR target/100841
* config/xtensa/xtensa.h (LEAF_REG_REMAP): Cast REGNO to int to avoid
-Wtype-limits warnings.
(DWARF_FRAME_REGISTER): Rewrite into ternary operator with addition
in operands to avoid -Wsign-compare warnings.

--- gcc/config/xtensa/xtensa.h.jj   2021-01-04 10:25:45.570157539 +0100
+++ gcc/config/xtensa/xtensa.h  2021-06-02 17:41:04.190861829 +0200
@@ -279,7 +279,7 @@ extern const char xtensa_leaf_regs[FIRST
 
 /* For Xtensa, no remapping is necessary, but this macro must be
defined if LEAF_REGISTERS is defined.  */
-#define LEAF_REG_REMAP(REGNO) (REGNO)
+#define LEAF_REG_REMAP(REGNO) ((int) (REGNO))
 
 /* This must be declared if LEAF_REGISTERS is set.  */
 extern int leaf_function;
@@ -775,8 +775,9 @@ typedef struct xtensa_args
 #define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (Pmode, 0)
 #define DWARF_FRAME_RETURN_COLUMN DWARF_FRAME_REGNUM (0)
 #define DWARF_ALT_FRAME_RETURN_COLUMN 16
-#define DWARF_FRAME_REGISTERS (DWARF_ALT_FRAME_RETURN_COLUMN   \
-  + (TARGET_WINDOWED_ABI ? 0 : 1))
+#define DWARF_FRAME_REGISTERS (TARGET_WINDOWED_ABI \
+  ? DWARF_ALT_FRAME_RETURN_COLUMN  \
+  : DWARF_ALT_FRAME_RETURN_COLUMN + 1)
 #define EH_RETURN_DATA_REGNO(N) ((N) < 2 ? (N) + 2 : INVALID_REGNUM)
 #define ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL) \
   (flag_pic\

Jakub



Re: [PATCH 07/11] m68k: Update unexpected empty split condition

2021-06-02 Thread Jeff Law via Gcc-patches




On 6/1/2021 11:04 PM, Kewen Lin wrote:

gcc/ChangeLog:

* config/m68k/m68k.md (*zero_extend_inc, *zero_extend_dec,
*zero_extendsidi2): Fix empty split condition.

OK.  Thanks.
jeff



Re: [PATCH] rtl: constm64_rtx..const64_rtx

2021-06-02 Thread Richard Sandiford via Gcc-patches
Segher Boessenkool  writes:
> Since times immemorial there has been const_int_rtx for all values from
> -64 to 64, but only constm1_rtx..const2_rtx have been available for
> convenient use.  Change this, so that we can use all values in
> {-64,...,64} in RTL easily.  This matters, because then we we just say
>   if (XEXP (x, 1) == const16_rtx)
> and things like that, since all const_int in that range are unique.  We
> already do for -1, 0, 1, 2, but we could for everything.
>
> 2021-06-01  Segher Boessenkool  
>   * rtl.h (constm64_rtx, ..., constm2_rtx): New.
>   (const3_rtx, ..., const64_rtx): New.
>
> doc/
>   * rtl.texi (Constants): Document the new names.
> ---
> Tested on powerpc64-linux {-m32,-m64}, but this of course doesn't mean
> all that much until any of the new names are actually used.
>
> Is this okay for trunk?

No strong objection, but personally I'd rather not add something
that is very specific to VOIDmode CONST_INTs.  I realise it's very
unlikely that we'll ever be able to give CONST_INTs their proper mode
(no-one has the kind of time needed to do that), but I don't think we
should make the switch actively harder either.

How about adding a new inline helper function that tests whether an
rtx is a CONST_INT with a given value?  Then we could have a
__builtin_constant_p shortcut for the [-64, 64] case.  That would
also avoid hard-coding the range.

Thanks,
Richard

>
>
> Segher
>
>
>  gcc/doc/rtl.texi |  20 +
>  gcc/rtl.h| 127 
> ++-
>  2 files changed, 137 insertions(+), 10 deletions(-)
>
> diff --git a/gcc/doc/rtl.texi b/gcc/doc/rtl.texi
> index 5af71137a878..5dbfb6028095 100644
> --- a/gcc/doc/rtl.texi
> +++ b/gcc/doc/rtl.texi
> @@ -1658,19 +1658,21 @@ copies of the top bit.  Note however that values are 
> neither
>  inherently signed nor inherently unsigned; where necessary, signedness
>  is determined by the rtl operation instead.
>  
> +@findex constm64_rtx
> +@findex constm1_rtx
>  @findex const0_rtx
>  @findex const1_rtx
>  @findex const2_rtx
> -@findex constm1_rtx
> -There is only one expression object for the integer value zero; it is
> +@findex const64_rtx
> +There is only one expression object for the integer value zero: it is
>  the value of the variable @code{const0_rtx}.  Likewise, the only
> -expression for integer value one is found in @code{const1_rtx}, the only
> -expression for integer value two is found in @code{const2_rtx}, and the
> -only expression for integer value negative one is found in
> -@code{constm1_rtx}.  Any attempt to create an expression of code
> -@code{const_int} and value zero, one, two or negative one will return
> -@code{const0_rtx}, @code{const1_rtx}, @code{const2_rtx} or
> -@code{constm1_rtx} as appropriate.
> +expression for integer value one is found in @code{const1_rtx}, and more
> +generally, the only expression for integer value @var{N} is found in
> +@code{const@var{N}_rtx}, and the only expression for integer value negative
> +@var{N} is found in @code{constm@var{N}_rtx}, both for any @var{N} from 1 up
> +to 64 inclusive.  Any attempt to create an expression of code
> +@code{const_int} and value in that range will return @code{const0_rtx},
> +@code{const1_rtx}, and so on, as appropriate.
>  
>  @findex const_true_rtx
>  Similarly, there is only one object for the integer whose value is
> diff --git a/gcc/rtl.h b/gcc/rtl.h
> index 35178b5bfaca..5429b7a3f4ac 100644
> --- a/gcc/rtl.h
> +++ b/gcc/rtl.h
> @@ -3768,10 +3768,135 @@ extern unsigned int split_all_insns_noflow (void);
>  #define MAX_SAVED_CONST_INT 64
>  extern GTY(()) rtx const_int_rtx[MAX_SAVED_CONST_INT * 2 + 1];
>  
> +#define constm64_rtx (const_int_rtx[MAX_SAVED_CONST_INT-64])
> +#define constm63_rtx (const_int_rtx[MAX_SAVED_CONST_INT-63])
> +#define constm62_rtx (const_int_rtx[MAX_SAVED_CONST_INT-62])
> +#define constm61_rtx (const_int_rtx[MAX_SAVED_CONST_INT-61])
> +#define constm60_rtx (const_int_rtx[MAX_SAVED_CONST_INT-60])
> +#define constm59_rtx (const_int_rtx[MAX_SAVED_CONST_INT-59])
> +#define constm58_rtx (const_int_rtx[MAX_SAVED_CONST_INT-58])
> +#define constm57_rtx (const_int_rtx[MAX_SAVED_CONST_INT-57])
> +#define constm56_rtx (const_int_rtx[MAX_SAVED_CONST_INT-56])
> +#define constm55_rtx (const_int_rtx[MAX_SAVED_CONST_INT-55])
> +#define constm54_rtx (const_int_rtx[MAX_SAVED_CONST_INT-54])
> +#define constm53_rtx (const_int_rtx[MAX_SAVED_CONST_INT-53])
> +#define constm52_rtx (const_int_rtx[MAX_SAVED_CONST_INT-52])
> +#define constm51_rtx (const_int_rtx[MAX_SAVED_CONST_INT-51])
> +#define constm50_rtx (const_int_rtx[MAX_SAVED_CONST_INT-50])
> +#define constm49_rtx (const_int_rtx[MAX_SAVED_CONST_INT-49])
> +#define constm48_rtx (const_int_rtx[MAX_SAVED_CONST_INT-48])
> +#define constm47_rtx (const_int_rtx[MAX_SAVED_CONST_INT-47])
> +#define constm46_rtx (const_int_rtx[MAX_SAVED_CONST_INT-46])
> +#define constm45_rtx (const_int_rtx[MAX_SAVED_CONST_INT-45])
> +#define constm44_rtx 

Re: [PATCH] AArch64: Improve address rematerialization costs

2021-06-02 Thread Wilco Dijkstra via Gcc-patches
Hi Richard,

> No.  It's never correct to completely wipe out the existing cost - you 
> don't know the context where this is being used.
> 
> The most you can do is not add any additional cost.

Remember that aarch64_rtx_costs starts like this:

  /* By default, assume that everything has equivalent cost to the
 cheapest instruction.  Any additional costs are applied as a delta
 above this default.  */
  *cost = COSTS_N_INSNS (1);

This is literally the last statement executed before the big switch...
Given the cost is always initialized, there is no existing cost besides this
default value, and thus changing it to something else is not an issue.
We could of course do something like:

*cost -= COSTS_N_INSNS (1);

But that is less clear and problematic if the default value ever changes.

Cheers,
Wilco

Re: GCC documentation: porting to Sphinx

2021-06-02 Thread Joseph Myers
On Wed, 2 Jun 2021, Joel Sherrill wrote:

> For RTEMS, we switched from texinfo to Sphinx and the dependency
> on Python3 for Sphinx has caused a bit of hassle. Is this going to be
> an issue for GCC?

What Sphinx (and, thus, Python) versions does the GCC manual build work 
with?  Can it work with e.g. any Sphinx versions from the past five years, 
or are there newer Sphinx features that are critical for the GCC manuals?  
I've seen a need for frequent Sphinx updates being a pain when building 
other software using Sphinx for its manual.

> Also we rely on TexLive for PDF output and that's a bit of a pain to
> install. Tex was incorrectly packaged on some RHEL/CentOS versions.

This is nothing new, since building PDF manuals from Texinfo sources also 
needs TeX.

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: [PATCH] MAINTAINERS: create DCO section; add myself to it

2021-06-02 Thread Koning, Paul via Gcc-patches



> On Jun 2, 2021, at 11:03 AM, Jason Merrill via Gcc-patches 
>  wrote:
> 
> On 6/1/21 3:22 PM, Richard Biener via Gcc wrote:
>> On June 1, 2021 7:30:54 PM GMT+02:00, David Malcolm via Gcc 
>>  wrote:
 ...
>>> 
>>> The MAINTAINERS file doesn't seem to have such a "DCO list"
>>> yet; does the following patch look like what you had in mind?
>>> 
>>> ChangeLog
>>> 
>>> * MAINTAINERS: Create DCO section; add myself to it.
>>> ---
>>> MAINTAINERS | 12 
>>> 1 file changed, 12 insertions(+)
>>> 
>>> diff --git a/MAINTAINERS b/MAINTAINERS
>>> index db25583b37b..1148e0915cf 100644
>>> --- a/MAINTAINERS
>>> +++ b/MAINTAINERS
>>> @@ -685,3 +685,15 @@ Josef Zlomek   
>>> 
>>> James Dennett   
>>> Christian Ehrhardt  
>>> Dara Hazeghi
>>> +
>>> +
>>> +DCO
>>> +===
>>> +
>>> +Developers with commit access may add their name to the following list
>>> +to certify the DCO (https://developercertificate.org/) for all
>> There should be a verbatim copy of the DCO in this file or the repository.
> 
> It's on the website now, at gcc.gnu.org/dco.html , and I've added the section 
> to MAINTAINERS.  It's not clear to me that it needs to be in the source tree 
> as well, since it's project contribution policy rather than license.

I'm wondering about change control of this document.  The GPL has a version 
number and references to use the version number.  The DCO seems to have a 
version number, but the DCO section in the MAINTAINERS file does not give it.  
I would think that a certification should call out which DCO it uses, whether 
in a one-off (in a patch) or in the MAINTAINERS DCO list.

paul



Re: [wwwdocs] lists: Fix thinko

2021-06-02 Thread Segher Boessenkool
On Wed, Jun 02, 2021 at 10:25:34AM +0200, Andreas Schwab wrote:
> On Jun 01 2021, Segher Boessenkool wrote:
> > -* ^List-Id: .*<.*@gcc.gnu.org>$
> > +* ^List-Id: .*<.*.gcc.gnu.org>$
> 
> Shouldn't the < and > be mangled as  and ?

"It works fine for me!"

You are right of course.


Segher


Re: [wwwdocs] lists: Fix thinko

2021-06-02 Thread Segher Boessenkool
On Wed, Jun 02, 2021 at 09:17:20AM +0200, Gerald Pfeifer wrote:
> On Tue, 1 Jun 2021, Segher Boessenkool wrote:
> > Brown paper bag time.  The List-Id: should look like a hostname, not
> > like an email address.  Somehow I put in an at-sign when changing my
> > gcc-patches example to the match-all example we have here.
> 
> That's how things looked like before when using the Sender: header,
> for example. So totally understandable.

And the existing example (like most procmail recipes) uses unescaped dot
where a literal dot would perhaps be better.

> > -* ^List-Id: .*<.*@gcc.gnu.org>$
> > +* ^List-Id: .*<.*.gcc.gnu.org>$
> 
> In my own filters I use
> 
>  ^List-Id: .*gcc.gnu.org
> 
> to make it simpler and increase robustness around "<" and ">" (and "$").

And decrease robustness elsewhere (it now will match any list id that
has the string gccXgnuYorg anywhere in it, where X and Y can be any
character).

> Or   ^List-Id: .*gcc(-announce|-patches|-cvs-wwwdocs)?.gcc.gnu.org  in
> one case.
> 
> What do you think?

I only filter "gcc", "gcc-patches", "gcc-bugs" myself, all to separate
folders.

I use  .*<...>$  on all lists I have everywhere, it works fine for me.
It already isn't super strict, and more lax might work fine as well
(there is absolutely nothing that prevents anyone from sending specially
crafted emails to wreak havoc on everyone's filters anyway).

If you change the example, maybe make it less peculiar at the same time?
Filtering a high-traffic list like gcc-patches to the same folder as
gcc-announce is a strange thing to do.

(And please do make a change, so I don't have to brown-paper-bag a
second time, this time for , sigh).


Segher


Re: [PATCH] inline-asm: Fix ICE with bitfields in "m" operands [PR100785]

2021-06-02 Thread Jason Merrill via Gcc-patches

On 6/2/21 11:25 AM, Jakub Jelinek wrote:

On Wed, Jun 02, 2021 at 11:09:45AM -0400, Jason Merrill wrote:

On 6/2/21 3:59 AM, Jakub Jelinek wrote:

  if (!allows_reg && !cxx_mark_addressable (*op))
operand = error_mark_node;
+ else if (!allows_reg && bitfield_p (*op))
+   {
+ error_at (loc, "attempt to take address of bit-field");


Hmm, why aren't we already catching this in cxx_mark_addressable?


That is certainly possible, but it goes against Eric's patch
https://gcc.gnu.org/pipermail/gcc-patches/2015-June/421483.html


Hmm, I wonder what his rationale was?


So, do you want to keep the
   if (bitfield_p (arg))
 {
   if (complain & tf_error)
 error_at (loc, "attempt to take address of bit-field");
   return error_mark_node;
 }
in cp_build_addr_expr_1 and duplicate such check in cxx_mark_addressable
(though, that one doesn't have complain, will it be ok to do it
unconditionally for SFINAE)?


We would need to add complain; the existing diagnostics can't happen in 
SFINAE context, but this one can.


Alternately, cxx_mark_addressable could abort on a bitfield to require 
the caller to handle it, but that seems less useful.


Jason


Shall the C FE do the same (i.e. diagnose in both places)?




Re: GCC documentation: porting to Sphinx

2021-06-02 Thread Joel Sherrill
For RTEMS, we switched from texinfo to Sphinx and the dependency
on Python3 for Sphinx has caused a bit of hassle. Is this going to be
an issue for GCC?

Also we rely on TexLive for PDF output and that's a bit of a pain to
install. Tex was incorrectly packaged on some RHEL/CentOS versions.

This ignores a couple of plugins we use that I don't expect GCC to use.

It works great but the host dependencies are sometimes a pain. We've
ended up writing host OS specific advice/howto's to address this. Any
expectations on host pain versus the pretty painless texinfo?

Thanks.

--joel
RTEMS

On Wed, Jun 2, 2021 at 2:37 AM Martin Liška  wrote:

> On 6/1/21 3:31 PM, Michael Matz wrote:
> > Hello,
> >
> > On Tue, 1 Jun 2021, Martin Liška wrote:
> >
> >> On 5/31/21 5:49 PM, Michael Matz wrote:
> >>> Hello Martin,
> >>>
> >>> On Mon, 31 May 2021, Martin Liška wrote:
> >>>
>  I've made quite some progress with the porting of the documentation
> and
>  I would like to present it to the community now:
>  https://splichal.eu/scripts/sphinx/
> Note the documentation is automatically ([1]) generated from
> texinfo with
>  a
>  GitHub workflow ([2]).
> >>>
> >>> One other thing I was recently thinking about, in the Spinx vs. texinfo
> >>> discussion: locally available documentation browsable/searchable in
> >>> terminal with info(1) (or equivalents).
> >>
> >> Yes, that's handy.
> >>
> >>> I think the above (i.e. generating .rst from the texinfo file) would
> >>> immediately nullify all my worries.  So, just to be extra sure: your
> >>> proposal now is to generate the .rst files, and that .texinfo remains
> >>> the maintained sources, right?
> >>
> >> No, .texinfo files will be gone. However, Sphinx can output to info
> >> format:
> >>
> https://www.sphinx-doc.org/en/master/man/sphinx-build.html#cmdoption-sphinx-build-M
> >
> > I see, that's good to hear.
> >
> >> And I've just added the generated Info pages here:
> >> https://splichal.eu/scripts/sphinx/
> >
> > Okay, but there's something amiss, just compare a local gcc.info with
> > that.  The sphinx generated one seems to only contain command line
> > options, but none of the other topics, in particular it seems to contain
> > the "Invoking GCC" chapter (and only that) as top-level, and all other
> > ones are missing (like "C implementation", "C++ implementation", "C
> > extension", and so on).
>
> You are right, I reduced that to 'Invoking GCC', which is simply what 'man
> gcc'
> presents. However, I moved that back to the entire GCC manual what you can
> see now in the info page.
>
> >
> > Looking at gccint.info I also seem quite some confusion, it's unclear to
> > me if content is missing or not.  But e.g. the top-level structure has a
> > different order (a less logical one, this one is btw. shared with the
> > order of the HTML generated docu, so it's probably specific to sphinx
> > setup or such).
>
> Yes, the organization was bad and I fixed that. Now it's much better.
>
> Martin
>
> >
> > Ignoring that missing content what is there right now does seem somewhat
> > acceptable for local use, though.
> >
> >
> > Ciao,
> > Michael.
> >
>
>


Re: [PATCH] define auto_vec copy ctor and assignment (PR 90904)

2021-06-02 Thread Martin Sebor via Gcc-patches

On 6/2/21 12:55 AM, Richard Biener wrote:

On Tue, Jun 1, 2021 at 9:56 PM Martin Sebor  wrote:


On 5/27/21 2:53 PM, Jason Merrill wrote:

On 4/27/21 11:52 AM, Martin Sebor via Gcc-patches wrote:

On 4/27/21 8:04 AM, Richard Biener wrote:

On Tue, Apr 27, 2021 at 3:59 PM Martin Sebor  wrote:


On 4/27/21 1:58 AM, Richard Biener wrote:

On Tue, Apr 27, 2021 at 2:46 AM Martin Sebor via Gcc-patches
 wrote:


PR 90904 notes that auto_vec is unsafe to copy and assign because
the class manages its own memory but doesn't define (or delete)
either special function.  Since I first ran into the problem,
auto_vec has grown a move ctor and move assignment from
a dynamically-allocated vec but still no copy ctor or copy
assignment operator.

The attached patch adds the two special functions to auto_vec along
with a few simple tests.  It makes auto_vec safe to use in containers
that expect copyable and assignable element types and passes
bootstrap
and regression testing on x86_64-linux.


The question is whether we want such uses to appear since those
can be quite inefficient?  Thus the option is to delete those
operators?


I would strongly prefer the generic vector class to have the properties
expected of any other generic container: copyable and assignable.  If
we also want another vector type with this restriction I suggest to add
another "noncopyable" type and make that property explicit in its name.
I can submit one in a followup patch if you think we need one.


I'm not sure (and not strictly against the copy and assign).  Looking
around
I see that vec<> does not do deep copying.  Making auto_vec<> do it
might be surprising (I added the move capability to match how vec<>
is used - as "reference" to a vector)


The vec base classes are special: they have no ctors at all (because
of their use in unions).  That's something we might have to live with
but it's not a model to follow in ordinary containers.


I don't think we have to live with it anymore, now that we're writing
C++11.


The auto_vec class was introduced to fill the need for a conventional
sequence container with a ctor and dtor.  The missing copy ctor and
assignment operators were an oversight, not a deliberate feature.
This change fixes that oversight.

The revised patch also adds a copy ctor/assignment to the auto_vec
primary template (that's also missing it).  In addition, it adds
a new class called auto_vec_ncopy that disables copying and
assignment as you prefer.


Hmm, adding another class doesn't really help with the confusion richi
mentions.  And many uses of auto_vec will pass them as vec, which will
still do a shallow copy.  I think it's probably better to disable the
copy special members for auto_vec until we fix vec<>.


There are at least a couple of problems that get in the way of fixing
all of vec to act like a well-behaved C++ container:

1) The embedded vec has a trailing "flexible" array member with its
instances having different size.  They're initialized by memset and
copied by memcpy.  The class can't have copy ctors or assignments
but it should disable/delete them instead.

2) The heap-based vec is used throughout GCC with the assumption of
shallow copy semantics (not just as function arguments but also as
members of other such POD classes).  This can be changed by providing
copy and move ctors and assignment operators for it, and also for
some of the classes in which it's a member and that are used with
the same assumption.

3) The heap-based vec::block_remove() assumes its elements are PODs.
That breaks in VEC_ORDERED_REMOVE_IF (used in gcc/dwarf2cfi.c:2862
and tree-vect-patterns.c).

I took a stab at both and while (1) is easy, (2) is shaping up to
be a big and tricky project.  Tricky because it involves using
std::move in places where what's moved is subsequently still used.
I can keep plugging away at it but it won't change the fact that
the embedded and heap-based vecs have different requirements.


So you figured that neither vec<> nor auto_vec<> are a container like
std::vector.


That's obvious from glancing at their definitions.  I didn't go
through the exercise to figure that out.



I'm not sure it makes sense to try to make it so since obviously vec<>
was designed to match the actual needs of GCC.  auto_vec<> was added
to make a RAII (like auto_bitmap, etc.) wrapper, plus it got the ability
to provide initial stack storage.


The goal was to see if the two vec instances could be made safer
to use but taking advantage of C++ 11 features.  As I mentioned
recently, creating a copy of a vec and modifying it changes it as
well as the original (e.g., by changing a vec argument passed to
it by value a function changes the actual argument in the caller).
That's surprising to most C++ programmers.

My conclusion from the exercise is that although some of the problems
with vec can, and IMO should, be solved, making the heap-based one
a well-behaved C++ 11 container will take considerable effort and
is impossible for the 

Re: [PATCH] AArch64: Improve address rematerialization costs

2021-06-02 Thread Richard Earnshaw via Gcc-patches




On 02/06/2021 11:21, Wilco Dijkstra via Gcc-patches wrote:

Hi,

Given the large improvements from better register allocation of GOT accesses,
I decided to generalize it to get large gains for normal addressing too:

Improve rematerialization costs of addresses.  The current costs are set too 
high
which results in extra register pressure and spilling.  Using lower costs means
addresses will be rematerialized more often rather than being spilled or causing
spills.  This results in significant codesize reductions and performance gains.
SPECINT2017 improves by 0.27% with LTO and 0.16% without LTO.  Codesize is 0.12%
smaller.

Passes bootstrap and regress. OK for commit?

ChangeLog:
2021-06-01  Wilco Dijkstra  

 * config/aarch64/aarch64.c (aarch64_rtx_costs): Use better 
rematerialization
 costs for HIGH, LO_SUM and SYMBOL_REF.

---

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 
641c83b479e76cbcc75b299eb7ae5f634d9db7cd..08245827daa3f8199b29031e754244c078f0f500
 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -13444,45 +13444,22 @@ cost_plus:
  return false;  /* All arguments need to be in registers.  */
}
  
-case SYMBOL_REF:

+/* The following costs are used for rematerialization of addresses.
+   Set a low cost for all global accesses - this ensures they are
+   preferred for rematerialization, blocks them from being spilled
+   and reduces register pressure.  The result is significant codesize
+   reductions and performance gains. */
  
-  if (aarch64_cmodel == AARCH64_CMODEL_LARGE

- || aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC)
-   {
- /* LDR.  */
- if (speed)
-   *cost += extra_cost->ldst.load;
-   }
-  else if (aarch64_cmodel == AARCH64_CMODEL_SMALL
-  || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC)
-   {
- /* ADRP, followed by ADD.  */
- *cost += COSTS_N_INSNS (1);
- if (speed)
-   *cost += 2 * extra_cost->alu.arith;
-   }
-  else if (aarch64_cmodel == AARCH64_CMODEL_TINY
-  || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
-   {
- /* ADR.  */
- if (speed)
-   *cost += extra_cost->alu.arith;
-   }
-
-  if (flag_pic)
-   {
- /* One extra load instruction, after accessing the GOT.  */
- *cost += COSTS_N_INSNS (1);
- if (speed)
-   *cost += extra_cost->ldst.load;
-   }
+case SYMBOL_REF:
+  *cost = 0;
return true;


No.  It's never correct to completely wipe out the existing cost - you 
don't know the context where this is being used.


The most you can do is not add any additional cost.

Similarly for all the other cases.

  
  case HIGH:

+  *cost = 0;
+  return true;
+
  case LO_SUM:
-  /* ADRP/ADD (immediate).  */
-  if (speed)
-   *cost += extra_cost->alu.arith;
+  *cost = COSTS_N_INSNS (3) / 4;
return true;
  
  case ZERO_EXTRACT:




Re: [GCC][Patch] arm: Fix the mve multilib for the broken cmse support (pr99939).

2021-06-02 Thread Richard Earnshaw via Gcc-patches




On 01/06/2021 18:16, Srinath Parvathaneni via Gcc-patches wrote:

Hi Richard,


-Original Message-
From: Richard Earnshaw 
Sent: 13 April 2021 14:55
To: Srinath Parvathaneni ; gcc-
patc...@gcc.gnu.org
Cc: Richard Earnshaw 
Subject: Re: [GCC][Patch] arm: Fix the mve multilib for the broken cmse
support (pr99939).



On 12/04/2021 14:04, Srinath Parvathaneni via Gcc-patches wrote:

Hi,

The current CMSE support in the multilib build for "-march=armv8.1-

m.main+mve -mfloat-abi=hard -mfpu=auto"

is broken as specified in PR99939 and this patch fixes the issue.

Regression tested on arm-none-eabi and found no regressions.

Ok for master? and Ok for GCC-10 branch?

Regards,
Srinath.

gcc/testsuite/ChangeLog:

2021-04-12  Srinath Parvathaneni  

PR target/99939
* gcc.target/arm/cmse/cmse-20.c: New test.

libgcc/ChangeLog:

2021-04-12  Srinath Parvathaneni  

PR target/99939
* config/arm/t-arm: Make changes to use cmse.c for all the
armv8.1-m.main mulitlibs.



### Attachment also inlined for ease of reply

###



diff --git a/gcc/testsuite/gcc.target/arm/cmse/cmse-20.c
b/gcc/testsuite/gcc.target/arm/cmse/cmse-20.c
new file mode 100644
index


..7e2739e14792624adf5b428
0ca58

a5d8320acbf0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/cmse-20.c
@@ -0,0 +1,28 @@
+/* { dg-do run } */
+/* { dg-additional-options "-mcmse
+-Wl,--section-start,.gnu.sgstubs=0x0019" } */
+
+#include 
+#include 
+#include 
+
+void __attribute__((cmse_nonsecure_entry))
+secure_fun (int a, int *p)
+{
+  void *b = cmse_check_address_range ((void *)p, a, 1);
+
+  if (b == NULL)
+   __builtin_abort ();
+  printf("%d", *((int *)b));
+}
+
+int
+main (void)
+{
+  int *ptr;
+  int size = 1;
+  ptr = (int *) calloc (1, sizeof(int *));
+  *ptr = 1315852292;
+  secure_fun (size, ptr);
+  free (ptr);
+  return 0;
+}
diff --git a/libgcc/config/arm/t-arm b/libgcc/config/arm/t-arm index


3625a2590beec4e4e0e0881be9ad284c595c7190..949e2ee06653680211ff2dcf
0b55

a41a6aedc31c 100644
--- a/libgcc/config/arm/t-arm
+++ b/libgcc/config/arm/t-arm
@@ -9,11 +9,12 @@ CMSE_OPTS:=-mcmse
   endif

   ifdef HAVE_CMSE
-ifndef HAVE_V81M
-libgcc-objects += cmse.o cmse_nonsecure_call.o
+libgcc-objects += cmse.o

   cmse.o: $(srcdir)/config/arm/cmse.c
$(gcc_compile) -c $(CMSE_OPTS) $<
+ifndef HAVE_V81M
+libgcc-objects += cmse_nonsecure_call.o
   cmse_nonsecure_call.o: $(srcdir)/config/arm/cmse_nonsecure_call.S
   $(gcc_compile) -c $<
   endif



So if I have two object files using CMSE and one is built with v8m, but the
other with v8.1m, when I link them, the needed additional support for the
v8m object file will be missing the library support.

Wouldn't it be better to just build the cmse_nonsecure_call code
unconditionally?  It won't be called if it's not needed, but will be there if
something does require it.


I have modified the patch to build the cmse_nonsecure_call code unconditionally,
I have attached the diff and cover letter in this email.

Please let me know if it is ok for master?

Regards,
Srinath.


R.




gcc/testsuite/ChangeLog:

2021-06-01  Srinath Parvathaneni  

* gcc.target/arm/cmse/cmse-18.c: Modify

Incomplete.  I know you've modified it, but how?

* gcc.target/arm/cmse/cmse-20.c: New test.

libgcc/ChangeLog:

2021-06-01  Srinath Parvathaneni  

* config/arm/cmse_nonsecure_call.S: Modify to add
__ARM_FEATURE_MVE macro check.

"Add __ARM_FEATURE_MVE macro check." is sufficient (I know you've 
modified it).


* config/arm/t-arm: Make changes to link cmse.o and
cmse_nonsecure_call.o on finding -mcmse gcc options.

Again, "Make changes to " is redundant.

diff --git a/gcc/testsuite/gcc.target/arm/cmse/cmse-20.c 
b/gcc/testsuite/gcc.target/arm/cmse/cmse-20.c

new file mode 100644
index 
..7e2739e14792624adf5b4280ca58a5d8320acbf0

--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/cmse-20.c
@@ -0,0 +1,28 @@
+/* { dg-do run } */
+/* { dg-additional-options "-mcmse 
-Wl,--section-start,.gnu.sgstubs=0x0019" } */


Why does this need a different stubs address to all the other executable 
CMSE tests?  Can't it use 0x0040 like them?



R.


Re: [PATCH] inline-asm: Fix ICE with bitfields in "m" operands [PR100785]

2021-06-02 Thread Jakub Jelinek via Gcc-patches
On Wed, Jun 02, 2021 at 11:09:45AM -0400, Jason Merrill wrote:
> On 6/2/21 3:59 AM, Jakub Jelinek wrote:
> >   if (!allows_reg && !cxx_mark_addressable (*op))
> > operand = error_mark_node;
> > + else if (!allows_reg && bitfield_p (*op))
> > +   {
> > + error_at (loc, "attempt to take address of bit-field");
> 
> Hmm, why aren't we already catching this in cxx_mark_addressable?

That is certainly possible, but it goes against Eric's patch
https://gcc.gnu.org/pipermail/gcc-patches/2015-June/421483.html

So, do you want to keep the
  if (bitfield_p (arg))
{
  if (complain & tf_error)
error_at (loc, "attempt to take address of bit-field");
  return error_mark_node;
}
in cp_build_addr_expr_1 and duplicate such check in cxx_mark_addressable
(though, that one doesn't have complain, will it be ok to do it
unconditionally for SFINAE)?

Shall the C FE do the same (i.e. diagnose in both places)?

Jakub



Re: [PATCH] inline-asm: Fix ICE with bitfields in "m" operands [PR100785]

2021-06-02 Thread Jason Merrill via Gcc-patches

On 6/2/21 3:59 AM, Jakub Jelinek wrote:

  if (!allows_reg && !cxx_mark_addressable (*op))
operand = error_mark_node;
+ else if (!allows_reg && bitfield_p (*op))
+   {
+ error_at (loc, "attempt to take address of bit-field");


Hmm, why aren't we already catching this in cxx_mark_addressable?

Jason



Re: [PATCH] MAINTAINERS: create DCO section; add myself to it

2021-06-02 Thread Jason Merrill via Gcc-patches

On 6/1/21 3:22 PM, Richard Biener via Gcc wrote:

On June 1, 2021 7:30:54 PM GMT+02:00, David Malcolm via Gcc  
wrote:

On Tue, 2021-06-01 at 10:00 -0400, David Edelsohn via Gcc wrote:

GCC was created as part of the GNU Project but has grown to operate
asan autonomous project.

The GCC Steering Committee has decided to relax the requirement to
assign copyright for all changes to the Free Software Foundation.
GCC
will continue to be developed, distributed, and licensed under the
GNU
General Public License v3.0. GCC will now accept contributions with
or
without an FSF copyright assignment. This change is consistent with
the practices of many other major Free Software projects, such as the
Linux kernel.

Contributors who have an FSF Copyright Assignment don't need to
change anything.  Contributors who wish to utilize the Developer
Certificate
of Origin[1] should add a Signed-off-by message to their commit
messages.
Developers with commit access may add their name to the DCO list in
the
MAINTAINERS file to certify the DCO for all future commits in lieu of
individual
Signed-off-by messages for each commit.

The GCC Steering Committee continues to affirm the principles of Free
Software, and that will never change.

- The GCC Steering Committee

[1] https://developercertificate.org/



The MAINTAINERS file doesn't seem to have such a "DCO list"
yet; does the following patch look like what you had in mind?

ChangeLog

* MAINTAINERS: Create DCO section; add myself to it.
---
MAINTAINERS | 12 
1 file changed, 12 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index db25583b37b..1148e0915cf 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -685,3 +685,15 @@ Josef Zlomek   

James Dennett   
Christian Ehrhardt  
Dara Hazeghi
+
+
+DCO
+===
+
+Developers with commit access may add their name to the following list
+to certify the DCO (https://developercertificate.org/) for all


There should be a verbatim copy of the DCO in this file or the repository.


It's on the website now, at gcc.gnu.org/dco.html , and I've added the 
section to MAINTAINERS.  It's not clear to me that it needs to be in the 
source tree as well, since it's project contribution policy rather than 
license.


Jason



Re: [PATCH, rs6000] Fix alias set of link reg save MEM

2021-06-02 Thread Pat Haugen via Gcc-patches
On 6/2/21 9:19 AM, Segher Boessenkool wrote:
> On Wed, Jun 02, 2021 at 08:23:48AM -0500, Pat Haugen wrote:
>> On 6/2/21 7:01 AM, Richard Biener wrote:
>>> So did you check the RTL (and alias-sets) produced by
>>> __builtin_return_address?  Test coverage might
>>> be low here and w/o scheduling opportunities to break things.
>>
>> __builtin_return_address creates it's own copy of the link reg to a pseudo 
>> upon function entry. It doesn't appear to try and "reuse" any LR copy/save 
>> location that might be generated via the prolog code. References to 
>> __builtin_return_address will then refer to that pseudo. So I don't see any 
>> connection between the prolog save code and __builtin_return_address.
> 
> That is for __builtin_return_address(0), the simple (and always working)
> one.  It is harder for non-zero arguments (although I don't see why
> those would not work, even with inlining).


Right, I realized after I sent the reply I was being a little too specific to 
__builtin_return_address(0). For non-zero args __builtin_return_address 
generates code to walk the appropriate number of stack frames back before 
loading the LR from the designated save area. All those mem references are 
using either frame-alias-set or 0 as their alias set, so we still should be 
fine. So regardless of the argument to the builtin, there is no connection 
between the current function's prologue LR save code and the code 
__builtin_return_address() would generate in the function.

-Pat


Re: [RFC/PATCH] updating global ranges and their effect on __builtin_unreachable code

2021-06-02 Thread Aldy Hernandez via Gcc-patches




On 6/2/21 1:52 PM, Richard Biener wrote:

On Wed, Jun 2, 2021 at 12:34 PM Aldy Hernandez via Gcc-patches
 wrote:



But the whole point of all this singing and dancing is not to make
warnings but to be able to implement assert (); or assume (); that
will result in no code but optimization based on the assumption.

That means that all the checks guarding __builtin_unreachable ()
should be removed at the GIMPLE level - just not too early
to preserve range info on the variables participating in the
guarding condition.

So yes, it sounds fragile but instead it's carefully architected.  Heh.

In particular it is designed so that early optimization leaves those
unreachable () around (for later LTO consumption and inlining, etc.
to be able to re-create the ranges) whilst VRP1 / DOM will end up
eliminating them.  I think we have testcases that verify said behavior,
namely optimize out range checks based on the assertions - maybe missed


Understood.

I will note that my proposed patch does not remove any unreachables, and 
maintains current behavior.  It just refines the ranges from the ranger 
with current global ranges.  So I think the patch should go in, 
regardless of what is decided with __builtin_unreachables downthread.



the case where this only happens after inlining (important for your friendly
C++ abstraction hell), and the unreachable()s gone.


I have pointed this out before, and will repeat it in case you missed it:

"Richard, you have made it very clear that we disagree on core design 
issues, but that's no reason to continually make snide comments on every 
patch or PR.  Can we keep the discussions focused on the technical bits?"


https://gcc.gnu.org/pipermail/gcc-patches/2021-April/569072.html



Re: [wwwdocs] lists: Correct procmail recipe

2021-06-02 Thread Segher Boessenkool
On Wed, Jun 02, 2021 at 10:44:34AM +0200, Gerald Pfeifer wrote:
> On Tue, 1 Jun 2021, Segher Boessenkool wrote:
> > We haven't had Sender: for a while now. 
> 
> "a while now" was about four(?) hours when you sent that yesterday. :-)

Ah, I thought it was since we moved to the new mailing list software.
I have used List-Id: in my procmailrcs since much longer, so I did not
notice the recent change (but irc did :-) )


Segher


Re: [PATCH, rs6000] Fix alias set of link reg save MEM

2021-06-02 Thread Segher Boessenkool
On Wed, Jun 02, 2021 at 08:23:48AM -0500, Pat Haugen wrote:
> On 6/2/21 7:01 AM, Richard Biener wrote:
> > So did you check the RTL (and alias-sets) produced by
> > __builtin_return_address?  Test coverage might
> > be low here and w/o scheduling opportunities to break things.
> 
> __builtin_return_address creates it's own copy of the link reg to a pseudo 
> upon function entry. It doesn't appear to try and "reuse" any LR copy/save 
> location that might be generated via the prolog code. References to 
> __builtin_return_address will then refer to that pseudo. So I don't see any 
> connection between the prolog save code and __builtin_return_address.

That is for __builtin_return_address(0), the simple (and always working)
one.  It is harder for non-zero arguments (although I don't see why
those would not work, even with inlining).


Segher


Re: [GCC][PATCH] arm: Fix multilib mapping for CDE extensions.

2021-06-02 Thread Richard Earnshaw via Gcc-patches




On 01/06/2021 18:08, Srinath Parvathaneni via Gcc-patches wrote:

Hi All,

On passing +cdecp[0-7] extension to the -march string in command line options,
multilib linking is failing as mentioned in PR100856. This patch fixes this
issue by generating a separate -march string only for multilib comparison.

Regression tested on arm-none-eabi and found no regressions.

Ok for master?


Not as it stands.  More comments below.


Regards,
Srinath.

gcc/ChangeLog:

2021-06-01  Srinath Parvathaneni  

PR target/100856
* common/config/arm/arm-common.c (arm_canon_arch_option): Modify
function to generate canonical march string after removing cde related
compiler extensions.
(arm_canon_arch_multilib_option): Define function.
* config/arm/arm-cpus.in (CDE_LIST): Define fgroup.
* config/arm/arm.h (arm_canon_arch_multilib_option): Define macro.
(CANON_ARCH_MULTILIB_SPEC_FUNCTION): Define macro.
(ARCH_CANONICAL_MULTILIB_SPECS): Define macro.
(TARGET_MULTLILIB_ARCH): Define macro.
* gcc.c (used_arg_t::operator ()): Add condition to generate separate
march string for multilib matching.

gcc/testsuite/ChangeLog:

2021-06-01  Srinath Parvathaneni  

PR target/100856
* gcc.target/arm/acle/pr100856.c: New test.
* gcc.target/arm/multilib.exp: Modify.



### Attachment also inlined for ease of reply###


diff --git a/gcc/common/config/arm/arm-common.c 
b/gcc/common/config/arm/arm-common.c
index 
9980af6885c3dfe68f61fa0f39b23022b4e59c19..7d8c6e5253f3f1683eed99f479a09186a46c2d22
 100644
--- a/gcc/common/config/arm/arm-common.c
+++ b/gcc/common/config/arm/arm-common.c
@@ -616,6 +616,8 @@ public:
  }
  };
  
+static int multilib_arch = 0;


Please, no!  Instead...


+
  /* Generate a canonical representation of the -march option from the
 current -march string (if given) and other options on the command
 line that might affect the architecture.  This aids multilib selection
@@ -703,6 +705,14 @@ arm_canon_arch_option (int argc, const char **argv)


This function should be renamed arm_canon_arch_option_1 and given an 
extra (bool) parameter to control the behaviour.  Then 
arm_canon_arch_option can call it with the parameter false to maintain 
the existing behaviour and arm_canon_arch_multilib_option can pass true.



arm_initialize_isa (target_isa, selected_arch->common.isa_bits);
arm_parse_option_features (target_isa, _arch->common,
 strchr (arch, '+'));
+  if (multilib_arch == 1)
+   {
+ const enum isa_feature cde_bitlist[] = {ISA_ALL_CDE, isa_nobit};


This is too specific, but that's mostly a naming problem.  Instead we 
need a new feature group IGNORE_FOR_MULTILIB (which at present just 
contains ALL_CDE).



+ sbitmap isa_cdebits = sbitmap_alloc (isa_num_bits);
+ arm_initialize_isa (isa_cdebits, cde_bitlist);
+ bitmap_and_compl (target_isa, target_isa, isa_cdebits);
+   }
+
if (fpu && strcmp (fpu, "auto") != 0)
{
  /* We assume that architectures do not have any FPU bits
@@ -786,18 +796,27 @@ arm_canon_arch_option (int argc, const char **argv)
  
arm_initialize_isa (base_isa, selected_arch->common.isa_bits);
  
-  /* Architecture has no extension options, so just return the canonical

- architecture name.  */
-  if (selected_arch->common.extensions == NULL)
-return selected_arch->common.name;
-
/* We're only interested in extension bits.  */
bitmap_and_compl (target_isa, target_isa, base_isa);
  
+  /* Architecture has no extension options, so just return the canonical

+ architecture name.  */
+  if (multilib_arch == 0 && selected_arch->common.extensions == NULL)
+return selected_arch->common.name;
/* There are no extensions needed.  Just return the canonical architecture
   name.  */
-  if (bitmap_empty_p (target_isa))
+  else if (multilib_arch == 0 && bitmap_empty_p (target_isa))
  return selected_arch->common.name;
+  else if (multilib_arch == 1
+  && (selected_arch->common.extensions == NULL
+  || bitmap_empty_p (target_isa)))
+{
+  canonical_arch = (char *) xmalloc (strlen (selected_arch->common.name)
++ strlen ("march="));
+  strcpy (canonical_arch, "march=");


Prepending "march=" only for the multilib_arch variant confuses the 
purpose of this routine, which is to generate a suitable arch string.  I 
think this should be pushed up, perhaps as far as the specs strings 
themselves.  Then most of this hunk just disappears as completely redundant.



+  strcat (canonical_arch, selected_arch->common.name);
+  return canonical_arch;
+}
  
/* What is left is the architecture that the compiler will target.  We

   now need to map that back into a suitable option+features list.
@@ -899,10 +918,20 @@ 

Re: [PATCH] avr: Add atmega324pb MCU

2021-06-02 Thread Matwey V. Kornilov via Gcc-patches
Ping?

вс, 25 окт. 2020 г. в 16:09, Matwey V. Kornilov :
>
>
> Ping?
>
> чт, 4 июн. 2020 г. в 18:30, Matwey V. Kornilov :
>>
>> Reference: https://www.microchip.com/wwwproducts/en/ATMEGA324PB
>> ---
>>  gcc/config/avr/avr-mcus.def | 1 +
>>  gcc/doc/avr-mmcu.texi   | 2 +-
>>  2 files changed, 2 insertions(+), 1 deletion(-)
>>
>> diff --git a/gcc/config/avr/avr-mcus.def b/gcc/config/avr/avr-mcus.def
>> index 24046c367a7..ac80f9e63ff 100644
>> --- a/gcc/config/avr/avr-mcus.def
>> +++ b/gcc/config/avr/avr-mcus.def
>> @@ -207,6 +207,7 @@ AVR_MCU ("atmega323",ARCH_AVR5, AVR_ISA_NONE, 
>> "__AVR_ATmega323__",
>>  AVR_MCU ("atmega324a",   ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega324A__", 
>>0x0100, 0x0, 0x8000, 0)
>>  AVR_MCU ("atmega324p",   ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega324P__", 
>>0x0100, 0x0, 0x8000, 0)
>>  AVR_MCU ("atmega324pa",  ARCH_AVR5, AVR_ISA_NONE, 
>> "__AVR_ATmega324PA__",   0x0100, 0x0, 0x8000, 0)
>> +AVR_MCU ("atmega324pb",  ARCH_AVR5, AVR_ISA_NONE, 
>> "__AVR_ATmega324PB__",   0x0100, 0x0, 0x8000, 0)
>>  AVR_MCU ("atmega325",ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega325__",  
>>0x0100, 0x0, 0x8000, 0)
>>  AVR_MCU ("atmega325a",   ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega325A__", 
>>0x0100, 0x0, 0x8000, 0)
>>  AVR_MCU ("atmega325p",   ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega325P__", 
>>0x0100, 0x0, 0x8000, 0)
>> diff --git a/gcc/doc/avr-mmcu.texi b/gcc/doc/avr-mmcu.texi
>> index 5cd7b50afa2..87ed4476b1a 100644
>> --- a/gcc/doc/avr-mmcu.texi
>> +++ b/gcc/doc/avr-mmcu.texi
>> @@ -38,7 +38,7 @@
>>
>>  @item avr5
>>  ``Enhanced'' devices with 16@tie{}KiB up to 64@tie{}KiB of program memory.
>> -@*@var{mcu}@tie{}= @code{atmega16}, @code{atmega16a}, @code{atmega16hva}, 
>> @code{atmega16hva2}, @code{atmega16hvb}, @code{atmega16hvbrevb}, 
>> @code{atmega16m1}, @code{atmega16u4}, @code{atmega161}, @code{atmega162}, 
>> @code{atmega163}, @code{atmega164a}, @code{atmega164p}, @code{atmega164pa}, 
>> @code{atmega165}, @code{atmega165a}, @code{atmega165p}, @code{atmega165pa}, 
>> @code{atmega168}, @code{atmega168a}, @code{atmega168p}, @code{atmega168pa}, 
>> @code{atmega168pb}, @code{atmega169}, @code{atmega169a}, @code{atmega169p}, 
>> @code{atmega169pa}, @code{atmega32}, @code{atmega32a}, @code{atmega32c1}, 
>> @code{atmega32hvb}, @code{atmega32hvbrevb}, @code{atmega32m1}, 
>> @code{atmega32u4}, @code{atmega32u6}, @code{atmega323}, @code{atmega324a}, 
>> @code{atmega324p}, @code{atmega324pa}, @code{atmega325}, @code{atmega325a}, 
>> @code{atmega325p}, @code{atmega325pa}, @code{atmega328}, @code{atmega328p}, 
>> @code{atmega328pb}, @code{atmega329}, @code{atmega329a}, @code{atmega329p}, 
>> @code{atmega329pa}, @code{atmega3250}, @code{atmega3250a}, 
>> @code{atmega3250p}, @code{atmega3250pa}, @code{atmega3290}, 
>> @code{atmega3290a}, @code{atmega3290p}, @code{atmega3290pa}, 
>> @code{atmega406}, @code{atmega64}, @code{atmega64a}, @code{atmega64c1}, 
>> @code{atmega64hve}, @code{atmega64hve2}, @code{atmega64m1}, 
>> @code{atmega64rfr2}, @code{atmega640}, @code{atmega644}, @code{atmega644a}, 
>> @code{atmega644p}, @code{atmega644pa}, @code{atmega644rfr2}, 
>> @code{atmega645}, @code{atmega645a}, @code{atmega645p}, @code{atmega649}, 
>> @code{atmega649a}, @code{atmega649p}, @code{atmega6450}, @code{atmega6450a}, 
>> @code{atmega6450p}, @code{atmega6490}, @code{atmega6490a}, 
>> @code{atmega6490p}, @code{ata5795}, @code{ata5790}, @code{ata5790n}, 
>> @code{ata5791}, @code{ata6613c}, @code{ata6614q}, @code{ata5782}, 
>> @code{ata5831}, @code{ata8210}, @code{ata8510}, @code{ata5702m322}, 
>> @code{at90pwm161}, @code{at90pwm216}, @code{at90pwm316}, @code{at90can32}, 
>> @code{at90can64}, @code{at90scr100}, @code{at90usb646}, @code{at90usb647}, 
>> @code{at94k}, @code{m3000}.
>> +@*@var{mcu}@tie{}= @code{atmega16}, @code{atmega16a}, @code{atmega16hva}, 
>> @code{atmega16hva2}, @code{atmega16hvb}, @code{atmega16hvbrevb}, 
>> @code{atmega16m1}, @code{atmega16u4}, @code{atmega161}, @code{atmega162}, 
>> @code{atmega163}, @code{atmega164a}, @code{atmega164p}, @code{atmega164pa}, 
>> @code{atmega165}, @code{atmega165a}, @code{atmega165p}, @code{atmega165pa}, 
>> @code{atmega168}, @code{atmega168a}, @code{atmega168p}, @code{atmega168pa}, 
>> @code{atmega168pb}, @code{atmega169}, @code{atmega169a}, @code{atmega169p}, 
>> @code{atmega169pa}, @code{atmega32}, @code{atmega32a}, @code{atmega32c1}, 
>> @code{atmega32hvb}, @code{atmega32hvbrevb}, @code{atmega32m1}, 
>> @code{atmega32u4}, @code{atmega32u6}, @code{atmega323}, @code{atmega324a}, 
>> @code{atmega324p}, @code{atmega324pa}, @code{atmega324pb}, @code{atmega325}, 
>> @code{atmega325a}, @code{atmega325p}, @code{atmega325pa}, @code{atmega328}, 
>> @code{atmega328p}, @code{atmega328pb}, @code{atmega329}, @code{atmega329a}, 
>> @code{atmega329p}, @code{atmega329pa}, @code{atmega3250}, 
>> @code{atmega3250a}, @code{atmega3250p}, @code{atmega3250pa}, 
>> @code{atmega3290}, 

Re: [PATCH v2] Add vec_const_duplicate optab and TARGET_GEN_MEMSET_SCRATCH_RTX

2021-06-02 Thread H.J. Lu via Gcc-patches
On Wed, Jun 2, 2021 at 12:02 AM Richard Biener
 wrote:
>
> On Wed, Jun 2, 2021 at 3:57 AM H.J. Lu via Gcc-patches
>  wrote:
> >
> > On Tue, Jun 1, 2021 at 6:17 PM Hongtao Liu  wrote:
> > >
> > > On Wed, Jun 2, 2021 at 7:07 AM H.J. Lu via Gcc-patches
> > >  wrote:
> > > >
> > > > On Tue, Jun 1, 2021 at 7:21 AM Jeff Law  wrote:
> > > > >
> > > > >
> > > > >
> > > > > On 6/1/2021 7:29 AM, H.J. Lu via Gcc-patches wrote:
> > > > > > On Tue, Jun 1, 2021 at 6:25 AM Richard Biener
> > > > > >  wrote:
> > > > > >> On Tue, Jun 1, 2021 at 3:05 PM H.J. Lu  wrote:
> > > > > >>> On Mon, May 31, 2021 at 11:54:53PM -0600, Jeff Law wrote:
> > > > > 
> > > > >  On 5/31/2021 11:50 PM, Richard Sandiford wrote:
> > > > > > "H.J. Lu via Gcc-patches"  writes:
> > > > > >> On Mon, May 31, 2021 at 06:32:04AM -0700, H.J. Lu wrote:
> > > > > >>> On Mon, May 31, 2021 at 6:26 AM Richard Biener
> > > > > >>>  wrote:
> > > > >  On Mon, May 31, 2021 at 3:12 PM H.J. Lu 
> > > > >   wrote:
> > > > > > On Mon, May 31, 2021 at 5:46 AM Richard Biener
> > > > > >  wrote:
> > > > > >> On Mon, May 31, 2021 at 2:09 PM H.J. Lu 
> > > > > >>  wrote:
> > > > > >>> On Wed, May 26, 2021 at 10:28:16AM +0200, Richard Biener 
> > > > > >>> wrote:
> > > > > >>>-- Target Hook: rtx TARGET_GEN_MEMSET_VALUE (rtx 
> > > > > >>> DATA, scalar_int_mode
> > > > > >>> MODE)
> > > > > >>>This function returns the RTL of a register 
> > > > > >>> containing
> > > > > >>>'GET_MODE_SIZE (MODE)' consecutive copies of 
> > > > > >>> the unsigned char
> > > > > >>>value given in the RTL register DATA.  For 
> > > > > >>> example, if MODE is 4
> > > > > >>>bytes wide, return the RTL for 0x01010101*DATA.
> > > > > >> For this one I wonder if it should be an optab 
> > > > > >> instead.  Couldn't you
> > > > > >> use the existing vec_duplicate for this by using 
> > > > > >> (paradoxical) subregs
> > > > > >> like (subreg:TI (vec_duplicate:VnQI (subreg:VnQI 
> > > > > >> (reg:QI ...)))?
> > > > > > I tried.   It doesn't even work on x86.  See:
> > > > > >
> > > > > > https://gcc.gnu.org/pipermail/gcc-patches/2021-May/570661.html
> > > > >  Not sure what I should read from there...
> > > > > 
> > > > > > There are special cases to subreg HI, SI and DI modes 
> > > > > > of TI mode in
> > > > > > ix86_gen_memset_value_from_prev.   simplify_gen_subreg 
> > > > > > doesn't
> > > > > > work here.   Each backend may need its own special 
> > > > > > handling.
> > > > >  OK, I guess I'm not (RTL) qualified enough to further 
> > > > >  review these parts,
> > > > >  sorry.  Since we're doing code generation the canonical 
> > > > >  way to communicate
> > > > >  with backends should be optabs, not some set of 
> > > > >  disconnected target hooks.
> > > > >  But as said, I probably don't know enough of RTL to see 
> > > > >  why it's the only way.
> > > > > 
> > > > >  Richard.
> > > > > >>> Here is the patch to add optabs instead.  Does it look OK?
> > > > > >>>
> > > > > >>> Thanks.
> > > > > >>>
> > > > > >>> H.J.
> > > > > >>> ---
> > > > > >>> Add 2 optabs:
> > > > > >>>
> > > > > >>> 1. integer_extract: Extract lower bit value from the 
> > > > > >>> integer value in
> > > > > >>> TImode, OImode or XImode.
> > > > > >> That sounds very specific, esp. the restriction to 
> > > > > >> {TI,OI,XI}mode.
> > > > > >> It also sounds like it matches (subreg:{TI,OI,XI} (...) 
> > > > > >> 0).  There are
> > > > > >> existing target hooks verifying subreg validity - why's 
> > > > > >> that not a good
> > > > > >> fit here?  ISTR you say gen_lowpart () doesn't work (or 
> > > > > >> was it
> > > > > >> simplify_gen_subreg?), why's that so?
> > > > > > {TI,OI,XI}mode are storage only integer types.   subreg 
> > > > > > doesn't work
> > > > > > well on them.  I got
> > > > > >
> > > > > > [hjl@gnu-cfl-2 pieces]$ cat s2.i
> > > > > > extern void *ops;
> > > > > >
> > > > > > void
> > > > > > foo (int c)
> > > > > > {
> > > > > > __builtin_memset (ops, c, 34);
> > > > > > }
> > > > > > [hjl@gnu-cfl-2 pieces]$ make s2.s
> > > > > > /export/build/gnu/tools-build/gcc-gitlab-debug/build-x86_64-linux/gcc/xgcc
> > > > > > 

RE: [PATCH] ARM: reset arm_fp16_format

2021-06-02 Thread Tamar Christina via Gcc-patches
Hi Martin,

Testsuite isn't very happy with it:

Before:

# of expected passes149743
# of unexpected failures294   
# of unexpected successes   2 
# of expected failures  947   
# of unresolved testcases   56
# of unsupported tests  8248  

After:

# of expected passes148907
# of unexpected failures380
# of unexpected successes   2
# of expected failures  947
# of unresolved testcases   267
# of unsupported tests  8466

Regards,
Tamar

> -Original Message-
> From: Martin Liška 
> Sent: Tuesday, June 1, 2021 3:06 PM
> To: gcc-patches@gcc.gnu.org
> Cc: Christophe Lyon ; Tamar Christina
> ; Kyrylo Tkachov 
> Subject: [PATCH] ARM: reset arm_fp16_format
> 
> Hello.
> 
> The patch fixes https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98636#c20
> where target option restore can be called and arm_fp16_format should be
> reset to ARM_FP16_FORMAT_NONE.
> 
> It fixes the ICE in the PR.
> 
> Can please ARM folks test me the patch on a Arm machine?
> Thanks,
> Martin
> 
> gcc/ChangeLog:
> 
>   PR target/98636
>   * config/arm/arm.c (arm_option_reconfigure_globals): Reset
>   the option if isa_bit_fp16 is not set.
> ---
>   gcc/config/arm/arm.c | 2 ++
>   1 file changed, 2 insertions(+)
> 
> diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index
> 7b37e1b602c..4543f3c6b55 100644
> --- a/gcc/config/arm/arm.c
> +++ b/gcc/config/arm/arm.c
> @@ -3765,6 +3765,8 @@ arm_option_reconfigure_globals (void)
>   error ("selected fp16 options are incompatible");
> arm_fp16_format = ARM_FP16_FORMAT_IEEE;
>   }
> +  else
> +arm_fp16_format = ARM_FP16_FORMAT_NONE;
> 
> arm_arch_cde = 0;
> arm_arch_cde_coproc = 0;
> --
> 2.31.1



Re: [PATCH 2/4] [og11] Unify ARRAY_REF/INDIRECT_REF stripping code in extract_base_bit_offset

2021-06-02 Thread Julian Brown
On Wed, 2 Jun 2021 13:59:05 +0200
Richard Biener  wrote:

> On Wed, Jun 2, 2021 at 12:47 PM Julian Brown
>  wrote:
> >
> > For historical reasons, it seems that extract_base_bit_offset
> > unnecessarily used two different ways to strip
> > ARRAY_REF/INDIRECT_REF nodes from component accesses. I verified
> > that the two ways of performing the operation gave the same results
> > across the whole testsuite (and several additional benchmarks).  
> 
> But the two code paths clearly do sth different.  The base_ref case
> allows (*p)[i] while the !base_ref does not because TREE_CODE (base)
> != COMPONENT_REF.
> And the !base_ref case for INDIRECT_REF is quite odd, only allowing
> *(x.p) where x.p is of REFERENCE_TYPE.
> 
> Whatever this code is supposed to do ... maybe the "prologue" should
> be inlined at the two callers instead.

Thanks -- I'll go back and look at that bit again. I found it hard to
figure out the intention behind these differences, since AFAIK the
meaning should be the same -- maybe Jakub could weigh in as the
original author of this bit (I believe)?

(I figured something like "we can take this shortcut because we know we
already did XYZ to this bit" -- this part was originally completely
open-coded, i.e. there were two copies of this code, and it's only not
open-coded now because I tried to factor out the common bits...)

Julian


Re: [PATCH, rs6000] Fix alias set of link reg save MEM

2021-06-02 Thread Pat Haugen via Gcc-patches
On 6/2/21 7:01 AM, Richard Biener wrote:
> On Wed, Jun 2, 2021 at 1:15 PM Pat Haugen  wrote:
>>
>> On 6/2/21 1:51 AM, Richard Biener wrote:
>>> On Tue, Jun 1, 2021 at 10:37 PM Pat Haugen via Gcc-patches
>>>  wrote:

 Make sure link reg save MEM has frame alias set, to match other link reg
 save/restore code.

 Bootstrap/regtest on powerpc64/powerpc64le with no new regressions. Ok for
 trunk?

 -Pat


 2021-06-01  Pat Haugen  

 gcc/ChangeLog:

 * config/rs6000/rs6000-logue.c (rs6000_emit_prologue): Use
 gen_frame_store.



 diff --git a/gcc/config/rs6000/rs6000-logue.c 
 b/gcc/config/rs6000/rs6000-logue.c
 index 13c00e740d6..07337c4836a 100644
 --- a/gcc/config/rs6000/rs6000-logue.c
 +++ b/gcc/config/rs6000/rs6000-logue.c
 @@ -3257,7 +3257,7 @@ rs6000_emit_prologue (void)
if (!WORLD_SAVE_P (info) && info->lr_save_p
&& !cfun->machine->lr_is_wrapped_separately)
  {
 -  rtx addr, reg, mem;
 +  rtx reg;

reg = gen_rtx_REG (Pmode, 0);
START_USE (0);
 @@ -3267,13 +3267,8 @@ rs6000_emit_prologue (void)
if (!(strategy & (SAVE_NOINLINE_GPRS_SAVES_LR
 | SAVE_NOINLINE_FPRS_SAVES_LR)))
 {
 - addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
 -  GEN_INT (info->lr_save_offset + frame_off));
 - mem = gen_rtx_MEM (Pmode, addr);
 - /* This should not be of rs6000_sr_alias_set, because of
 -__builtin_return_address.  */
>>>
>>> I can't figure what this comment meant - did you?  Note the old code
>>> looks like it would end up with alias-set zero and thus more conservative
>>> than with using frame-alias-set so this is an optimization?
>>
>> No, I couldn't figure out the reasoning for the comment/code either. It's 
>> been that way since it was introduced in March 2000 as part of the “Merge in 
>> changes from newppc-branch.” patch. All other places where we save/restore 
>> the link reg use a MEM with frame-alias-set. This change is an optimization 
>> as you suspect in that it allows us to schedule non-aliased loads above the 
>> link reg store (which couldn't happen before due to use of alias-set zero).
> 
> So did you check the RTL (and alias-sets) produced by
> __builtin_return_address?  Test coverage might
> be low here and w/o scheduling opportunities to break things.

__builtin_return_address creates it's own copy of the link reg to a pseudo upon 
function entry. It doesn't appear to try and "reuse" any LR copy/save location 
that might be generated via the prolog code. References to 
__builtin_return_address will then refer to that pseudo. So I don't see any 
connection between the prolog save code and __builtin_return_address.

-Pat


Re: [RFC/PATCH] updating global ranges and their effect on __builtin_unreachable code

2021-06-02 Thread Andrew MacLeod via Gcc-patches

On 6/2/21 7:52 AM, Richard Biener wrote:

On Wed, Jun 2, 2021 at 12:34 PM Aldy Hernandez via Gcc-patches
 wrote:

We've been having "issues" in our branch when exporting to the global
space ranges that take into account previously known ranges
(SSA_NAME_RANGE_INFO, etc).  For the longest time we had the export
feature turned off because it had the potential of removing
__builtin_unreachable code early in the pipeline.  This was causing one
or two tests to fail.

I finally got fed up, and investigated why.

Take the following code:

i_4 = somerandom ();
if (i_4 < 0)
  goto ; [INV]
else
  goto ; [INV]

 :
__builtin_unreachable ();

 :

It turns out that both legacy evrp and VRP have code that notices the
above pattern and sets the *global* range for i_4 to [0,MAX].  That is,
the range for i_4 is set, not at BB4, but at the definition site.  See
uses of assert_unreachable_fallthru_edge_p() for details.

This global range causes subsequent passes (VRP1 in the testcase below),
to remove the checks and the __builtin_unreachable code altogether.

// pr80776-1.c
int somerandom (void);
void
Foo (void)
{
int i = somerandom ();
if (! (0 <= i))
  __builtin_unreachable ();
if (! (0 <= i && i <= 99))
  __builtin_unreachable ();
sprintf (number, "%d", i);
}

This means that by the time the -Wformat-overflow warning runs, the
above sprintf has been left unguarded, and a bogus warning is issued.

Currently the above test does not warn, but that's because of an
oversight in export_global_ranges().  This function is disregarding
known global ranges (SSA_NAME_RANGE_INFO and SSA_NAME_PTR_INFO) and only
setting ranges the ranger knows about.

For the above test the IL is:

 :
i_4 = somerandom ();
if (i_4 < 0)
  goto ; [INV]
else
  goto ; [INV]

 :
__builtin_unreachable ();

 :
i.0_1 = (unsigned int) i_4;
if (i.0_1 > 99)
  goto ; [INV]
else
  goto ; [INV]

 :
__builtin_unreachable ();

 :
_7 = __builtin___sprintf_chk (, 1, 7, "%d", i_4);


Legacy evrp has determined that the range for i_4 is [0,MAX] per my
analysis above, but ranger has no known range for i_4 at the definition
site.  So at export_global_ranges time, ranger leaves the [0,MAX] alone.

OTOH, evrp sets the global range at the definition for i.0_1 to
[0,99] per the same unreachable feature.  However, ranger has
correctly determined that the range for i.0_1 at the definition is
[0,MAX], which it then proceeds to export.  Since the current
export_global_ranges (mistakenly) does not take into account previous
global ranges, the ranges in the global tables end up like this:

i_4: [0, MAX]
i.0_1: [0, MAX]

This causes the first unreachable block to be removed in VRP1, but the
second one to remain.  Later VRP can determine that i_4 in the sprintf
call is [0,99], and no warning is issued.

But... the missing bogus warning is due to current export_global_ranges
ignoring SSA_NAME_RANGE_INFO and friends, something which I'd like to
fix.  However, fixing this, gets us back to:

i_4: [0, MAX]
i.0_1: [0, 99]

Which means, we'll be back to removing the unreachable blocks and
issuing a warning in pr80776-1.c (like we have been since the beginning
of time).

The attached patch fixes export_global_ranges to the expected behavior,
and adds the previous XFAIL to pr80776-1.c, while documenting why this
warning is issued in the first place.

Once legacy evrp is removed, this won't be an issue, as ranges in the IL
will tell the truth.  However, this will mean that we will no longer
remove the first __builtin_unreachable combo.  But ISTM, that would be
correct behavior ??.

BTW, in addition to this patch we could explore removing the
assert_unreachable_fallthru_edge_p() use in the evrp_analyzer, since it
is no longer needed to get the warnings in the testcases in the original
PR correctly (gcc.dg/pr80776-[12].c).

But the whole point of all this singing and dancing is not to make
warnings but to be able to implement assert (); or assume (); that
will result in no code but optimization based on the assumption.

That means that all the checks guarding __builtin_unreachable ()
should be removed at the GIMPLE level - just not too early
to preserve range info on the variables participating in the
guarding condition.

So yes, it sounds fragile but instead it's carefully architected.  Heh.

In particular it is designed so that early optimization leaves those
unreachable () around (for later LTO consumption and inlining, etc.
to be able to re-create the ranges) whilst VRP1 / DOM will end up
eliminating them.  I think we have testcases that verify said behavior,
namely optimize out range checks based on the assertions - maybe missed
the case where this only happens after inlining (important for your friendly
C++ abstraction hell), and the unreachable()s gone.

Please make sure to not break that.


Let me see if I understand...  we want to leave 

Re: [PATCH 04/11] cris: Update unexpected empty split condition

2021-06-02 Thread Hans-Peter Nilsson via Gcc-patches
> From: Kewen Lin 
> Date: Wed, 2 Jun 2021 07:04:54 +0200

> gcc/ChangeLog:
> 
>   * config/cris/cris.md (*addi_reload): Fix empty split condition.
> ---
>  gcc/config/cris/cris.md | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/gcc/config/cris/cris.md b/gcc/config/cris/cris.md
> index 7de0ec63fcf..d5a3c703a83 100644
> --- a/gcc/config/cris/cris.md
> +++ b/gcc/config/cris/cris.md
> @@ -1311,7 +1311,7 @@ (define_insn_and_split "*addi_reload"
> && (INTVAL (operands[3]) == 2 || INTVAL (operands[3]) == 4)
> && (reload_in_progress || reload_completed)"
>"#"
> -  ""
> +  "&& 1"
>[(set (match_dup 0)
>   (plus:SI (ashift:SI (match_dup 2) (match_dup 3)) (match_dup 1)))]
>"operands[3] = operands[3] == const2_rtx ? const1_rtx : const2_rtx;")
> -- 
> 2.17.1
> 

Ok, thanks, if only for all-round consistency.

In preparation for a warning for an empty condition?  I'm
usually all for .md-warnings, but I'm not sure about the
benefit of that one, though.  Those "&& 1" look...hackish.

brgds, H-P


[committed] libstdc++: Value-initialize objects held by EBO helpers [PR 100863]

2021-06-02 Thread Jonathan Wakely via Gcc-patches
The allocator, hash function and equality function should all be
value-initialized by the default constructor of an unordered container.
Do it in the EBO helper, so we don't have to get it right in multiple
places.

Signed-off-by: Jonathan Wakely 

libstdc++-v3/ChangeLog:

PR libstdc++/100863
PR libstdc++/65816
* include/bits/hashtable_policy.h (_Hashtable_ebo_helper):
Value-initialize subobject.
* testsuite/23_containers/unordered_map/allocator/default_init.cc:
Remove XFAIL.
* testsuite/23_containers/unordered_set/allocator/default_init.cc:
Remove XFAIL.

Tested powerpc64le-linux. Committed to trunk.

commit f8f0193b5b83f6e85d65015e79c803295baf5166
Author: Jonathan Wakely 
Date:   Wed Jun 2 12:34:48 2021

libstdc++: Value-initialize objects held by EBO helpers [PR 100863]

The allocator, hash function and equality function should all be
value-initialized by the default constructor of an unordered container.
Do it in the EBO helper, so we don't have to get it right in multiple
places.

Signed-off-by: Jonathan Wakely 

libstdc++-v3/ChangeLog:

PR libstdc++/100863
PR libstdc++/65816
* include/bits/hashtable_policy.h (_Hashtable_ebo_helper):
Value-initialize subobject.
* testsuite/23_containers/unordered_map/allocator/default_init.cc:
Remove XFAIL.
* testsuite/23_containers/unordered_set/allocator/default_init.cc:
Remove XFAIL.

diff --git a/libstdc++-v3/include/bits/hashtable_policy.h 
b/libstdc++-v3/include/bits/hashtable_policy.h
index 1090a398e1e..2130c958262 100644
--- a/libstdc++-v3/include/bits/hashtable_policy.h
+++ b/libstdc++-v3/include/bits/hashtable_policy.h
@@ -1162,7 +1162,7 @@ namespace __detail
 struct _Hashtable_ebo_helper<_Nm, _Tp, true>
 : private _Tp
 {
-  _Hashtable_ebo_helper() = default;
+  _Hashtable_ebo_helper() noexcept(noexcept(_Tp())) : _Tp() { }
 
   template
_Hashtable_ebo_helper(_OtherTp&& __tp)
@@ -1188,7 +1188,7 @@ namespace __detail
   _Tp& _M_get() { return _M_tp; }
 
 private:
-  _Tp _M_tp;
+  _Tp _M_tp{};
 };
 
   /**
@@ -1246,6 +1246,7 @@ namespace __detail
   // We need the default constructor for the local iterators and _Hashtable
   // default constructor.
   _Hash_code_base() = default;
+
   _Hash_code_base(const _Hash& __hash) : __ebo_hash(__hash) { }
 
   __hash_code
@@ -1639,6 +1640,7 @@ namespace __detail
 
 protected:
   _Hashtable_base() = default;
+
   _Hashtable_base(const _Hash& __hash, const _Equal& __eq)
   : __hash_code_base(__hash), _EqualEBO(__eq)
   { }
diff --git 
a/libstdc++-v3/testsuite/23_containers/unordered_map/allocator/default_init.cc 
b/libstdc++-v3/testsuite/23_containers/unordered_map/allocator/default_init.cc
index bbfd683d433..12f1163951e 100644
--- 
a/libstdc++-v3/testsuite/23_containers/unordered_map/allocator/default_init.cc
+++ 
b/libstdc++-v3/testsuite/23_containers/unordered_map/allocator/default_init.cc
@@ -17,7 +17,6 @@
 
 // { dg-do run { target c++11 } }
 // { dg-options "-O0" }
-// { dg-xfail-run-if "PR c++/65816" { *-*-* } }
 
 #include 
 #include 
diff --git 
a/libstdc++-v3/testsuite/23_containers/unordered_set/allocator/default_init.cc 
b/libstdc++-v3/testsuite/23_containers/unordered_set/allocator/default_init.cc
index 6ee32d45ceb..1ea6603024e 100644
--- 
a/libstdc++-v3/testsuite/23_containers/unordered_set/allocator/default_init.cc
+++ 
b/libstdc++-v3/testsuite/23_containers/unordered_set/allocator/default_init.cc
@@ -17,7 +17,6 @@
 
 // { dg-do run { target c++11 } }
 // { dg-options "-O0" }
-// { dg-xfail-run-if "PR c++/65816" { *-*-* } }
 
 #include 
 #include 


Re: [PATCH] Hashtable PR96088

2021-06-02 Thread Jonathan Wakely via Gcc-patches

On 01/06/21 19:10 +0100, Jonathan Wakely wrote:

On 01/06/21 18:47 +0100, Jonathan Wakely wrote:

On 01/06/21 18:45 +0100, Jonathan Wakely wrote:

On 22/05/21 18:35 +0200, François Dumont wrote:

diff --git a/libstdc++-v3/testsuite/23_containers/unordered_set/96088.cc 
b/libstdc++-v3/testsuite/23_containers/unordered_set/96088.cc
new file mode 100644
index 000..53bb754dab6
--- /dev/null
+++ b/libstdc++-v3/testsuite/23_containers/unordered_set/96088.cc
@@ -0,0 +1,271 @@
+// { dg-do run { target c++11 } }
+
+// Copyright (C) 2021 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// .
+
+// libstdc++/96088
+
+#include 


This is a c++11 test, but it uses .

The test fails for make check RUNTESTFLAGS=--target_board=unix/-std=gnu++11

I assume it should use { target c++17 } instead?


Same for 23_containers/unordered_map/96088.cc


I've pushed this fix.


And this one too.

Tested x86_64-linux.


commit 81eab204a56dcd8acb1ca5d7df277437ca07b51a
Author: Jonathan Wakely 
Date:   Wed Jun 2 12:33:38 2021

libstdc++: Fix tests for COW std::string [PR 96088]

The expected number of allocations is different when copying COW
strings.

Signed-off-by: Jonathan Wakely 

libstdc++-v3/ChangeLog:

PR libstdc++/96088
* testsuite/23_containers/unordered_map/96088.cc: Adjust
expected number of allocations.
* testsuite/23_containers/unordered_set/96088.cc: Likewise.

diff --git a/libstdc++-v3/testsuite/23_containers/unordered_map/96088.cc b/libstdc++-v3/testsuite/23_containers/unordered_map/96088.cc
index e552b04f8c8..83ca1c0afd6 100644
--- a/libstdc++-v3/testsuite/23_containers/unordered_map/96088.cc
+++ b/libstdc++-v3/testsuite/23_containers/unordered_map/96088.cc
@@ -222,7 +222,8 @@ test03()
   std::vector> v;
   v.insert(v.end(), lst.begin(), lst.end());
 
-  auto __offset = __gnu_test::counter::count();
+  const auto origin = __gnu_test::counter::count();
+
   {
 __gnu_test::counter::reset();
 std::unordered_map v;
   v.insert(v.end(), lst.begin(), lst.end());
 
-  auto __offset = __gnu_test::counter::count();
+  const auto origin = __gnu_test::counter::count();
+
   {
 __gnu_test::counter::reset();
 std::unordered_set

Re: [patch] Tame fix for PR ipa/99122

2021-06-02 Thread Richard Biener via Gcc-patches
On Wed, Jun 2, 2021 at 2:05 PM Eric Botcazou  wrote:
>
> Hi,
>
> as explained in the audit trail, the return part has a major performance
> impact in Ada where variable-sized types are first-class citizens, but it
> turns out that it is not exercized in the testsuite yet.
>
> Tested on x86-64/Linux, OK for mainline and 11 branch?

Not sure whether we know VLA results are always returned by
reference?  In particular does this mean we'll never see a
WITH_SIZE_EXPR on the LHS of a call?  You might have noticed
I've done WITH_SIZE_EXPR "enhancements" recently on trunk.

As for the patch we indeed didn't have a testcase covering the
return case so it should be safe to revert the return part.

I still wonder about the WITH_SIZE_EXPR here ;)

Richard.

>
> 2021-06-02  Eric Botcazou  
>
> PR ipa/99122
> * tree-inline.c (inline_forbidden_p): Remove test on return type.
>
>
> 2021-06-02  Eric Botcazou  
>
> * gnat.dg/inline22.adb: New test.
>
> --
> Eric Botcazou


[patch] Tame fix for PR ipa/99122

2021-06-02 Thread Eric Botcazou
Hi,

as explained in the audit trail, the return part has a major performance 
impact in Ada where variable-sized types are first-class citizens, but it 
turns out that it is not exercized in the testsuite yet.

Tested on x86-64/Linux, OK for mainline and 11 branch?


2021-06-02  Eric Botcazou  

PR ipa/99122
* tree-inline.c (inline_forbidden_p): Remove test on return type.


2021-06-02  Eric Botcazou  

* gnat.dg/inline22.adb: New test.

-- 
Eric Botcazoudiff --git a/gcc/tree-inline.c b/gcc/tree-inline.c
index d38e8617e3d..cc7168614c0 100644
--- a/gcc/tree-inline.c
+++ b/gcc/tree-inline.c
@@ -4004,17 +4004,11 @@ inline_forbidden_p (tree fndecl)
   wi.info = (void *) fndecl;
   wi.pset = _nodes;
 
-  /* We cannot inline a function with a VLA typed argument or result since
- we have no implementation materializing a variable of such type in
- the caller.  */
-  if (COMPLETE_TYPE_P (TREE_TYPE (TREE_TYPE (fndecl)))
-  && !poly_int_tree_p (TYPE_SIZE (TREE_TYPE (TREE_TYPE (fndecl)
-{
-  inline_forbidden_reason
-	= G_("function %q+F can never be inlined because "
-	 "it has a VLA return argument");
-  return true;
-}
+  /* We cannot inline a function with a variable-sized parameter because we
+ cannot materialize a temporary of such a type in the caller if need be.
+
+ Note that the return case is not symmetrical because we can guarantee
+ that a temporary is not needed by means of CALL_EXPR_RETURN_SLOT_OPT.  */
   for (tree parm = DECL_ARGUMENTS (fndecl); parm; parm = DECL_CHAIN (parm))
 if (!poly_int_tree_p (DECL_SIZE (parm)))
   {
-- { dg-compile }

procedure Inline22 (L, U : Integer) is

  type Arr is array (Integer range L .. U) of Boolean;

  function Get_Zero return Arr;
  pragma Inline_Always (Get_Zero);

  function Get_Zero return Arr is
  begin
return (others => False);
  end;

  A : Arr;

begin
  A := Get_Zero;
end;


Re: [PATCH, rs6000] Fix alias set of link reg save MEM

2021-06-02 Thread Richard Biener via Gcc-patches
On Wed, Jun 2, 2021 at 1:15 PM Pat Haugen  wrote:
>
> On 6/2/21 1:51 AM, Richard Biener wrote:
> > On Tue, Jun 1, 2021 at 10:37 PM Pat Haugen via Gcc-patches
> >  wrote:
> >>
> >> Make sure link reg save MEM has frame alias set, to match other link reg
> >> save/restore code.
> >>
> >> Bootstrap/regtest on powerpc64/powerpc64le with no new regressions. Ok for
> >> trunk?
> >>
> >> -Pat
> >>
> >>
> >> 2021-06-01  Pat Haugen  
> >>
> >> gcc/ChangeLog:
> >>
> >> * config/rs6000/rs6000-logue.c (rs6000_emit_prologue): Use
> >> gen_frame_store.
> >>
> >>
> >>
> >> diff --git a/gcc/config/rs6000/rs6000-logue.c 
> >> b/gcc/config/rs6000/rs6000-logue.c
> >> index 13c00e740d6..07337c4836a 100644
> >> --- a/gcc/config/rs6000/rs6000-logue.c
> >> +++ b/gcc/config/rs6000/rs6000-logue.c
> >> @@ -3257,7 +3257,7 @@ rs6000_emit_prologue (void)
> >>if (!WORLD_SAVE_P (info) && info->lr_save_p
> >>&& !cfun->machine->lr_is_wrapped_separately)
> >>  {
> >> -  rtx addr, reg, mem;
> >> +  rtx reg;
> >>
> >>reg = gen_rtx_REG (Pmode, 0);
> >>START_USE (0);
> >> @@ -3267,13 +3267,8 @@ rs6000_emit_prologue (void)
> >>if (!(strategy & (SAVE_NOINLINE_GPRS_SAVES_LR
> >> | SAVE_NOINLINE_FPRS_SAVES_LR)))
> >> {
> >> - addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
> >> -  GEN_INT (info->lr_save_offset + frame_off));
> >> - mem = gen_rtx_MEM (Pmode, addr);
> >> - /* This should not be of rs6000_sr_alias_set, because of
> >> -__builtin_return_address.  */
> >
> > I can't figure what this comment meant - did you?  Note the old code
> > looks like it would end up with alias-set zero and thus more conservative
> > than with using frame-alias-set so this is an optimization?
>
> No, I couldn't figure out the reasoning for the comment/code either. It's 
> been that way since it was introduced in March 2000 as part of the “Merge in 
> changes from newppc-branch.” patch. All other places where we save/restore 
> the link reg use a MEM with frame-alias-set. This change is an optimization 
> as you suspect in that it allows us to schedule non-aliased loads above the 
> link reg store (which couldn't happen before due to use of alias-set zero).

So did you check the RTL (and alias-sets) produced by
__builtin_return_address?  Test coverage might
be low here and w/o scheduling opportunities to break things.

Richard.

> -Pat


Re: [PATCH 2/4] [og11] Unify ARRAY_REF/INDIRECT_REF stripping code in extract_base_bit_offset

2021-06-02 Thread Richard Biener via Gcc-patches
On Wed, Jun 2, 2021 at 12:47 PM Julian Brown  wrote:
>
> For historical reasons, it seems that extract_base_bit_offset
> unnecessarily used two different ways to strip ARRAY_REF/INDIRECT_REF
> nodes from component accesses. I verified that the two ways of performing
> the operation gave the same results across the whole testsuite (and
> several additional benchmarks).

But the two code paths clearly do sth different.  The base_ref case
allows (*p)[i] while the !base_ref does not because TREE_CODE (base)
!= COMPONENT_REF.
And the !base_ref case for INDIRECT_REF is quite odd, only allowing
*(x.p) where x.p is of REFERENCE_TYPE.

Whatever this code is supposed to do ... maybe the "prologue" should be inlined
at the two callers instead.

Richard.

> The code was like this since an earlier "mechanical" refactoring by me,
> first posted here:
>
>   https://gcc.gnu.org/pipermail/gcc-patches/2018-November/510503.html
>
> It was never clear to me if there was an important semantic
> difference between the two ways of stripping the base before calling
> get_inner_reference, but it appears that there is not, so one can go away.
>
> 2021-06-02  Julian Brown  
>
> gcc/
> * gimplify.c (extract_base_bit_offset): Unify ARRAY_REF/INDIRECT_REF
> stripping code in first call/subsequent call cases.
> ---
>  gcc/gimplify.c | 32 +++-
>  1 file changed, 11 insertions(+), 21 deletions(-)
>
> diff --git a/gcc/gimplify.c b/gcc/gimplify.c
> index a38cd502aa5..255a2a648c1 100644
> --- a/gcc/gimplify.c
> +++ b/gcc/gimplify.c
> @@ -8527,31 +8527,21 @@ extract_base_bit_offset (tree base, tree *base_ref, 
> poly_int64 *bitposp,
>poly_offset_int poffset;
>
>if (base_ref)
> -{
> -  *base_ref = NULL_TREE;
> -
> -  while (TREE_CODE (base) == ARRAY_REF)
> -   base = TREE_OPERAND (base, 0);
> +*base_ref = NULL_TREE;
>
> -  if (TREE_CODE (base) == INDIRECT_REF)
> -   base = TREE_OPERAND (base, 0);
> -}
> -  else
> +  if (TREE_CODE (base) == ARRAY_REF)
>  {
> -  if (TREE_CODE (base) == ARRAY_REF)
> -   {
> - while (TREE_CODE (base) == ARRAY_REF)
> -   base = TREE_OPERAND (base, 0);
> - if (TREE_CODE (base) != COMPONENT_REF
> - || TREE_CODE (TREE_TYPE (base)) != ARRAY_TYPE)
> -   return NULL_TREE;
> -   }
> -  else if (TREE_CODE (base) == INDIRECT_REF
> -  && TREE_CODE (TREE_OPERAND (base, 0)) == COMPONENT_REF
> -  && (TREE_CODE (TREE_TYPE (TREE_OPERAND (base, 0)))
> -  == REFERENCE_TYPE))
> +  while (TREE_CODE (base) == ARRAY_REF)
> base = TREE_OPERAND (base, 0);
> +  if (TREE_CODE (base) != COMPONENT_REF
> + || TREE_CODE (TREE_TYPE (base)) != ARRAY_TYPE)
> +   return NULL_TREE;
>  }
> +  else if (TREE_CODE (base) == INDIRECT_REF
> +  && TREE_CODE (TREE_OPERAND (base, 0)) == COMPONENT_REF
> +  && (TREE_CODE (TREE_TYPE (TREE_OPERAND (base, 0)))
> +  == REFERENCE_TYPE))
> +base = TREE_OPERAND (base, 0);
>
>base = get_inner_reference (base, , , , ,
>   , , );
> --
> 2.29.2
>


Re: [RFC/PATCH] updating global ranges and their effect on __builtin_unreachable code

2021-06-02 Thread Richard Biener via Gcc-patches
On Wed, Jun 2, 2021 at 12:34 PM Aldy Hernandez via Gcc-patches
 wrote:
>
> We've been having "issues" in our branch when exporting to the global
> space ranges that take into account previously known ranges
> (SSA_NAME_RANGE_INFO, etc).  For the longest time we had the export
> feature turned off because it had the potential of removing
> __builtin_unreachable code early in the pipeline.  This was causing one
> or two tests to fail.
>
> I finally got fed up, and investigated why.
>
> Take the following code:
>
>i_4 = somerandom ();
>if (i_4 < 0)
>  goto ; [INV]
>else
>  goto ; [INV]
>
> :
>__builtin_unreachable ();
>
> :
>
> It turns out that both legacy evrp and VRP have code that notices the
> above pattern and sets the *global* range for i_4 to [0,MAX].  That is,
> the range for i_4 is set, not at BB4, but at the definition site.  See
> uses of assert_unreachable_fallthru_edge_p() for details.
>
> This global range causes subsequent passes (VRP1 in the testcase below),
> to remove the checks and the __builtin_unreachable code altogether.
>
> // pr80776-1.c
> int somerandom (void);
> void
> Foo (void)
> {
>int i = somerandom ();
>if (! (0 <= i))
>  __builtin_unreachable ();
>if (! (0 <= i && i <= 99))
>  __builtin_unreachable ();
>sprintf (number, "%d", i);
> }
>
> This means that by the time the -Wformat-overflow warning runs, the
> above sprintf has been left unguarded, and a bogus warning is issued.
>
> Currently the above test does not warn, but that's because of an
> oversight in export_global_ranges().  This function is disregarding
> known global ranges (SSA_NAME_RANGE_INFO and SSA_NAME_PTR_INFO) and only
> setting ranges the ranger knows about.
>
> For the above test the IL is:
>
> :
>i_4 = somerandom ();
>if (i_4 < 0)
>  goto ; [INV]
>else
>  goto ; [INV]
>
> :
>__builtin_unreachable ();
>
> :
>i.0_1 = (unsigned int) i_4;
>if (i.0_1 > 99)
>  goto ; [INV]
>else
>  goto ; [INV]
>
> :
>__builtin_unreachable ();
>
> :
>_7 = __builtin___sprintf_chk (, 1, 7, "%d", i_4);
>
>
> Legacy evrp has determined that the range for i_4 is [0,MAX] per my
> analysis above, but ranger has no known range for i_4 at the definition
> site.  So at export_global_ranges time, ranger leaves the [0,MAX] alone.
>
> OTOH, evrp sets the global range at the definition for i.0_1 to
> [0,99] per the same unreachable feature.  However, ranger has
> correctly determined that the range for i.0_1 at the definition is
> [0,MAX], which it then proceeds to export.  Since the current
> export_global_ranges (mistakenly) does not take into account previous
> global ranges, the ranges in the global tables end up like this:
>
> i_4: [0, MAX]
> i.0_1: [0, MAX]
>
> This causes the first unreachable block to be removed in VRP1, but the
> second one to remain.  Later VRP can determine that i_4 in the sprintf
> call is [0,99], and no warning is issued.
>
> But... the missing bogus warning is due to current export_global_ranges
> ignoring SSA_NAME_RANGE_INFO and friends, something which I'd like to
> fix.  However, fixing this, gets us back to:
>
> i_4: [0, MAX]
> i.0_1: [0, 99]
>
> Which means, we'll be back to removing the unreachable blocks and
> issuing a warning in pr80776-1.c (like we have been since the beginning
> of time).
>
> The attached patch fixes export_global_ranges to the expected behavior,
> and adds the previous XFAIL to pr80776-1.c, while documenting why this
> warning is issued in the first place.
>
> Once legacy evrp is removed, this won't be an issue, as ranges in the IL
> will tell the truth.  However, this will mean that we will no longer
> remove the first __builtin_unreachable combo.  But ISTM, that would be
> correct behavior ??.
>
> BTW, in addition to this patch we could explore removing the
> assert_unreachable_fallthru_edge_p() use in the evrp_analyzer, since it
> is no longer needed to get the warnings in the testcases in the original
> PR correctly (gcc.dg/pr80776-[12].c).

But the whole point of all this singing and dancing is not to make
warnings but to be able to implement assert (); or assume (); that
will result in no code but optimization based on the assumption.

That means that all the checks guarding __builtin_unreachable ()
should be removed at the GIMPLE level - just not too early
to preserve range info on the variables participating in the
guarding condition.

So yes, it sounds fragile but instead it's carefully architected.  Heh.

In particular it is designed so that early optimization leaves those
unreachable () around (for later LTO consumption and inlining, etc.
to be able to re-create the ranges) whilst VRP1 / DOM will end up
eliminating them.  I think we have testcases that verify said behavior,
namely optimize out range checks based on the assertions - maybe missed
the case where this only happens after inlining (important for your friendly
C++ 

[committed] libstdc++: Improve punctuation in implementation status docs

2021-06-02 Thread Jonathan Wakely via Gcc-patches
Signed-off-by: Jonathan Wakely 

libstdc++-v3/ChangeLog:

* doc/xml/manual/status_cxxis29124.xml: Improve punctuation.
* doc/xml/manual/status_cxxtr1.xml: Likewise.
* doc/xml/manual/status_cxxtr24733.xml: Likewise.
* doc/html/*: Regenerate.

Committed to trunk.

commit ca35586cf5cf1617294d5452df8ca5285d81646f
Author: Jonathan Wakely 
Date:   Wed Jun 2 12:31:04 2021

libstdc++: Improve punctuation in implementation status docs

Signed-off-by: Jonathan Wakely 

libstdc++-v3/ChangeLog:

* doc/xml/manual/status_cxxis29124.xml: Improve punctuation.
* doc/xml/manual/status_cxxtr1.xml: Likewise.
* doc/xml/manual/status_cxxtr24733.xml: Likewise.
* doc/html/*: Regenerate.

diff --git a/libstdc++-v3/doc/xml/manual/status_cxxis29124.xml 
b/libstdc++-v3/doc/xml/manual/status_cxxis29124.xml
index 40a90fc9944..2d9baa1ab68 100644
--- a/libstdc++-v3/doc/xml/manual/status_cxxis29124.xml
+++ b/libstdc++-v3/doc/xml/manual/status_cxxis29124.xml
@@ -10,9 +10,9 @@
 
 
 
-This table is based on the table of contents of ISO/IEC FDIS 29124
-Doc No: N3060 Date: 2010-03-06
-Extensions to the C++ Library to support mathematical special functions
+This table is based on the table of contents of ISO/IEC FDIS 29124,
+Doc No: N3060, Date: 2010-03-06,
+"Extensions to the C++ Library to support mathematical special functions".
 
 
 
diff --git a/libstdc++-v3/doc/xml/manual/status_cxxtr1.xml 
b/libstdc++-v3/doc/xml/manual/status_cxxtr1.xml
index 021cb6394a7..addfd1f2d86 100644
--- a/libstdc++-v3/doc/xml/manual/status_cxxtr1.xml
+++ b/libstdc++-v3/doc/xml/manual/status_cxxtr1.xml
@@ -10,9 +10,9 @@
 
 
 
-This table is based on the table of contents of ISO/IEC DTR 19768
-Doc No: N1836=05-0096 Date: 2005-06-24
-Draft Technical Report on C++ Library Extensions
+This table is based on the table of contents of ISO/IEC DTR 19768,
+Doc No: N1836=05-0096, Date: 2005-06-24,
+"Draft Technical Report on C++ Library Extensions".
 
 
 
diff --git a/libstdc++-v3/doc/xml/manual/status_cxxtr24733.xml 
b/libstdc++-v3/doc/xml/manual/status_cxxtr24733.xml
index 139b94442e8..f64dc983c6e 100644
--- a/libstdc++-v3/doc/xml/manual/status_cxxtr24733.xml
+++ b/libstdc++-v3/doc/xml/manual/status_cxxtr24733.xml
@@ -5,14 +5,15 @@
 C++ TR 24733
   
 TR 24733
+decimal floating-point
   
 
 
 
 This table is based on the table of contents of
-ISO/IEC TR 24733 Date: 2009-08-28
-Extension for the programming language C++ to support
-decimal floating-point arithmetic
+ISO/IEC TR 24733:2011,
+"Extensions for the programming language C++ to support
+decimal floating-point arithmetic".
 
 
 


Re: [PATCH, rs6000] Fix alias set of link reg save MEM

2021-06-02 Thread Pat Haugen via Gcc-patches
On 6/2/21 1:51 AM, Richard Biener wrote:
> On Tue, Jun 1, 2021 at 10:37 PM Pat Haugen via Gcc-patches
>  wrote:
>>
>> Make sure link reg save MEM has frame alias set, to match other link reg
>> save/restore code.
>>
>> Bootstrap/regtest on powerpc64/powerpc64le with no new regressions. Ok for
>> trunk?
>>
>> -Pat
>>
>>
>> 2021-06-01  Pat Haugen  
>>
>> gcc/ChangeLog:
>>
>> * config/rs6000/rs6000-logue.c (rs6000_emit_prologue): Use
>> gen_frame_store.
>>
>>
>>
>> diff --git a/gcc/config/rs6000/rs6000-logue.c 
>> b/gcc/config/rs6000/rs6000-logue.c
>> index 13c00e740d6..07337c4836a 100644
>> --- a/gcc/config/rs6000/rs6000-logue.c
>> +++ b/gcc/config/rs6000/rs6000-logue.c
>> @@ -3257,7 +3257,7 @@ rs6000_emit_prologue (void)
>>if (!WORLD_SAVE_P (info) && info->lr_save_p
>>&& !cfun->machine->lr_is_wrapped_separately)
>>  {
>> -  rtx addr, reg, mem;
>> +  rtx reg;
>>
>>reg = gen_rtx_REG (Pmode, 0);
>>START_USE (0);
>> @@ -3267,13 +3267,8 @@ rs6000_emit_prologue (void)
>>if (!(strategy & (SAVE_NOINLINE_GPRS_SAVES_LR
>> | SAVE_NOINLINE_FPRS_SAVES_LR)))
>> {
>> - addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
>> -  GEN_INT (info->lr_save_offset + frame_off));
>> - mem = gen_rtx_MEM (Pmode, addr);
>> - /* This should not be of rs6000_sr_alias_set, because of
>> -__builtin_return_address.  */
> 
> I can't figure what this comment meant - did you?  Note the old code
> looks like it would end up with alias-set zero and thus more conservative
> than with using frame-alias-set so this is an optimization?

No, I couldn't figure out the reasoning for the comment/code either. It's been 
that way since it was introduced in March 2000 as part of the “Merge in changes 
from newppc-branch.” patch. All other places where we save/restore the link reg 
use a MEM with frame-alias-set. This change is an optimization as you suspect 
in that it allows us to schedule non-aliased loads above the link reg store 
(which couldn't happen before due to use of alias-set zero).

-Pat


[PATCH 4/4] [og11] Rework indirect struct handling for OpenACC in gimplify.c

2021-06-02 Thread Julian Brown
This patch reworks indirect struct handling in gimplify.c (i.e. for
struct components mapped with "mystruct->a[0:n]", "mystruct->b", etc.),
for OpenACC.  The key observation leading to these changes was that
component mappings of references-to-structures is already implemented
and working, and indirect struct component handling via a pointer can
work quite similarly.  That lets us remove some earlier, special-case
handling for mapping indirect struct component accesses for OpenACC,
which required the pointed-to struct to be manually mapped before the
indirect component mapping.

With this patch, you can map struct components directly (e.g. an array
slice "mystruct->a[0:n]") just like you can map a non-indirect struct
component slice ("mystruct.a[0:n]"). Both references-to-pointers (with
the former syntax) and references to structs (with the latter syntax)
work now.

For Fortran class pointers, we no longer re-use GOMP_MAP_TO_PSET for the
class metadata (the structure that points to the class data and vptr)
-- it is instead treated as any other struct.

For C++, the struct handling also works for class members ("this->foo"),
without having to explicitly map "this[:1]" first.

For OpenACC, we permit chained indirect component references
("mystruct->a->b[0:n]"), though only the last part of such mappings will
trigger an attach/detach operation.  To properly use such a construct
on the target, you must still manually map "mystruct->a[:1]" first --
but there's no need to map "mystruct[:1]" explicitly before that.

This version of the patch avoids altering code paths for OpenMP,
where possible.

2021-06-02  Julian Brown  

gcc/fortran/
* trans-openmp.c (gfc_trans_omp_clauses): Don't create GOMP_MAP_TO_PSET
mappings for class metadata, nor GOMP_MAP_POINTER mappings for
POINTER_TYPE_P decls.

gcc/
* gimplify.c (extract_base_bit_offset): Add BASE_IND and OPENMP
parameters.  Handle pointer-typed indirect references for OpenACC
alongside reference-typed ones.
(strip_components_and_deref, aggregate_base_p): New functions.
(build_struct_group): Add pointer type indirect ref handling,
including chained references, for OpenACC.  Also handle references to
structs for OpenACC.  Conditionalise bits for OpenMP only where
appropriate.
(gimplify_scan_omp_clauses): Rework pointer-type indirect structure
access handling to work more like the reference-typed handling for
OpenACC only.
* omp-low.c (scan_sharing_clauses): Handle pointer-type indirect struct
references, and references to pointers to structs also.

gcc/testsuite/
* g++.dg/goacc/member-array-acc.C: New test.
* g++.dg/gomp/member-array-omp.C: New test.

libgomp/
* testsuite/libgomp.oacc-c-c++-common/deep-copy-15.c: New test.
* testsuite/libgomp.oacc-c-c++-common/deep-copy-16.c: New test.
* testsuite/libgomp.oacc-c++/deep-copy-17.C: New test.
---
 gcc/fortran/trans-openmp.c|  20 +-
 gcc/gimplify.c| 215 +---
 gcc/omp-low.c |  16 +-
 gcc/testsuite/g++.dg/goacc/member-array-acc.C |  13 +
 gcc/testsuite/g++.dg/gomp/member-array-omp.C  |  13 +
 .../testsuite/libgomp.oacc-c++/deep-copy-17.C | 101 
 .../libgomp.oacc-c-c++-common/deep-copy-15.c  |  68 ++
 .../libgomp.oacc-c-c++-common/deep-copy-16.c  | 231 ++
 8 files changed, 619 insertions(+), 58 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/goacc/member-array-acc.C
 create mode 100644 gcc/testsuite/g++.dg/gomp/member-array-omp.C
 create mode 100644 libgomp/testsuite/libgomp.oacc-c++/deep-copy-17.C
 create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-15.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-16.c

diff --git a/gcc/fortran/trans-openmp.c b/gcc/fortran/trans-openmp.c
index 9540b7bc161..28fd790d7ad 100644
--- a/gcc/fortran/trans-openmp.c
+++ b/gcc/fortran/trans-openmp.c
@@ -3012,30 +3012,16 @@ gfc_trans_omp_clauses (stmtblock_t *block, 
gfc_omp_clauses *clauses,
  tree present = gfc_omp_check_optional_argument (decl, true);
  if (openacc && n->sym->ts.type == BT_CLASS)
{
- tree type = TREE_TYPE (decl);
  if (n->sym->attr.optional)
sorry ("optional class parameter");
- if (POINTER_TYPE_P (type))
-   {
- node4 = build_omp_clause (input_location,
-   OMP_CLAUSE_MAP);
- OMP_CLAUSE_SET_MAP_KIND (node4, GOMP_MAP_POINTER);
- OMP_CLAUSE_DECL (node4) = decl;
- OMP_CLAUSE_SIZE (node4) = size_int (0);
- decl = build_fold_indirect_ref (decl);
-   }
   

[PATCH 3/4] [og11] Refactor struct lowering for OpenACC/OpenMP in gimplify.c

2021-06-02 Thread Julian Brown
This patch is a second attempt at refactoring struct component mapping
handling for OpenACC/OpenMP during gimplification, after the patch I
posted here:

  https://gcc.gnu.org/pipermail/gcc-patches/2018-November/510503.html

And improved here, post-review:

  https://gcc.gnu.org/pipermail/gcc-patches/2019-November/533394.html

This patch goes further, in that the struct-handling code is outlined
into its own function (to create the "GOMP_MAP_STRUCT" node and the
sorted list of nodes immediately following it, from a set of mappings
of components of a given struct or derived type). I've also gone through
the list-handling code and attempted to add comments documenting how it
works to the best of my understanding, and broken out a couple of helper
functions in order to (hopefully) have the code self-document better also.

2021-06-02  Julian Brown  

gcc/
* gimplify.c (insert_struct_comp_map): Refactor function into...
(build_struct_comp_nodes): This new function.  Remove list handling
and improve self-documentation.
(insert_node_after, move_node_after, move_nodes_after,
move_concat_nodes_after): New helper functions.
(build_struct_group): New function to build up GOMP_MAP_STRUCT node
groups to map struct components. Outlined from...
(gimplify_scan_omp_clauses): Here.  Call above function.
---
 gcc/gimplify.c | 934 +++--
 1 file changed, 591 insertions(+), 343 deletions(-)

diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index 255a2a648c1..5b11f292456 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -8440,73 +8440,66 @@ gimplify_omp_depend (tree *list_p, gimple_seq *pre_p)
   return 1;
 }
 
-/* Insert a GOMP_MAP_ALLOC or GOMP_MAP_RELEASE node following a
-   GOMP_MAP_STRUCT mapping.  C is an always_pointer mapping.  STRUCT_NODE is
-   the struct node to insert the new mapping after (when the struct node is
-   initially created).  PREV_NODE is the first of two or three mappings for a
-   pointer, and is either:
- - the node before C, when a pair of mappings is used, e.g. for a C/C++
-   array section.
- - not the node before C.  This is true when we have a reference-to-pointer
-   type (with a mapping for the reference and for the pointer), or for
-   Fortran derived-type mappings with a GOMP_MAP_TO_PSET.
-   If SCP is non-null, the new node is inserted before *SCP.
-   if SCP is null, the new node is inserted before PREV_NODE.
-   The return type is:
- - PREV_NODE, if SCP is non-null.
- - The newly-created ALLOC or RELEASE node, if SCP is null.
- - The second newly-created ALLOC or RELEASE node, if we are mapping a
-   reference to a pointer.  */
+/* For a set of mappings describing an array section pointed to by a struct
+   (or derived type, etc.) component, create an "alloc" or "release" node to
+   insert into a list following a GOMP_MAP_STRUCT node.  For some types of
+   mapping (e.g. Fortran arrays with descriptors), an additional mapping may
+   be created that is inserted into the list of mapping nodes attached to the
+   directive being processed -- not part of the sorted list of nodes after
+   GOMP_MAP_STRUCT.
+
+   CODE is the code of the directive being processed.  GRP_START and GRP_END
+   are the first and last of two or three nodes representing this array section
+   mapping (e.g. a data movement node like GOMP_MAP_{TO,FROM}, optionally a
+   GOMP_MAP_TO_PSET, and finally a GOMP_MAP_ALWAYS_POINTER).  EXTRA_NODE is
+   filled with the additional node described above, if needed.
+
+   This function does not add the new nodes to any lists itself.  It is the
+   responsibility of the caller to do that.  */
 
 static tree
-insert_struct_comp_map (enum tree_code code, tree c, tree struct_node,
-   tree prev_node, tree *scp)
+build_struct_comp_nodes (enum tree_code code, tree grp_start, tree grp_end,
+tree *extra_node)
 {
   enum gomp_map_kind mkind
 = (code == OMP_TARGET_EXIT_DATA || code == OACC_EXIT_DATA)
   ? GOMP_MAP_RELEASE : GOMP_MAP_ALLOC;
 
-  tree c2 = build_omp_clause (OMP_CLAUSE_LOCATION (c), OMP_CLAUSE_MAP);
-  tree cl = scp ? prev_node : c2;
+  gcc_assert (grp_start != grp_end);
+
+  tree c2 = build_omp_clause (OMP_CLAUSE_LOCATION (grp_end), OMP_CLAUSE_MAP);
   OMP_CLAUSE_SET_MAP_KIND (c2, mkind);
-  OMP_CLAUSE_DECL (c2) = unshare_expr (OMP_CLAUSE_DECL (c));
-  OMP_CLAUSE_CHAIN (c2) = scp ? *scp : prev_node;
-  if (OMP_CLAUSE_CHAIN (prev_node) != c
-  && OMP_CLAUSE_CODE (OMP_CLAUSE_CHAIN (prev_node)) == OMP_CLAUSE_MAP
-  && (OMP_CLAUSE_MAP_KIND (OMP_CLAUSE_CHAIN (prev_node))
- == GOMP_MAP_TO_PSET))
-OMP_CLAUSE_SIZE (c2) = OMP_CLAUSE_SIZE (OMP_CLAUSE_CHAIN (prev_node));
+  OMP_CLAUSE_DECL (c2) = unshare_expr (OMP_CLAUSE_DECL (grp_end));
+  OMP_CLAUSE_CHAIN (c2) = NULL_TREE;
+  tree grp_mid = NULL_TREE;
+  if (OMP_CLAUSE_CHAIN (grp_start) != grp_end)
+grp_mid = 

[PATCH 2/4] [og11] Unify ARRAY_REF/INDIRECT_REF stripping code in extract_base_bit_offset

2021-06-02 Thread Julian Brown
For historical reasons, it seems that extract_base_bit_offset
unnecessarily used two different ways to strip ARRAY_REF/INDIRECT_REF
nodes from component accesses. I verified that the two ways of performing
the operation gave the same results across the whole testsuite (and
several additional benchmarks).

The code was like this since an earlier "mechanical" refactoring by me,
first posted here:

  https://gcc.gnu.org/pipermail/gcc-patches/2018-November/510503.html

It was never clear to me if there was an important semantic
difference between the two ways of stripping the base before calling
get_inner_reference, but it appears that there is not, so one can go away.

2021-06-02  Julian Brown  

gcc/
* gimplify.c (extract_base_bit_offset): Unify ARRAY_REF/INDIRECT_REF
stripping code in first call/subsequent call cases.
---
 gcc/gimplify.c | 32 +++-
 1 file changed, 11 insertions(+), 21 deletions(-)

diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index a38cd502aa5..255a2a648c1 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -8527,31 +8527,21 @@ extract_base_bit_offset (tree base, tree *base_ref, 
poly_int64 *bitposp,
   poly_offset_int poffset;
 
   if (base_ref)
-{
-  *base_ref = NULL_TREE;
-
-  while (TREE_CODE (base) == ARRAY_REF)
-   base = TREE_OPERAND (base, 0);
+*base_ref = NULL_TREE;
 
-  if (TREE_CODE (base) == INDIRECT_REF)
-   base = TREE_OPERAND (base, 0);
-}
-  else
+  if (TREE_CODE (base) == ARRAY_REF)
 {
-  if (TREE_CODE (base) == ARRAY_REF)
-   {
- while (TREE_CODE (base) == ARRAY_REF)
-   base = TREE_OPERAND (base, 0);
- if (TREE_CODE (base) != COMPONENT_REF
- || TREE_CODE (TREE_TYPE (base)) != ARRAY_TYPE)
-   return NULL_TREE;
-   }
-  else if (TREE_CODE (base) == INDIRECT_REF
-  && TREE_CODE (TREE_OPERAND (base, 0)) == COMPONENT_REF
-  && (TREE_CODE (TREE_TYPE (TREE_OPERAND (base, 0)))
-  == REFERENCE_TYPE))
+  while (TREE_CODE (base) == ARRAY_REF)
base = TREE_OPERAND (base, 0);
+  if (TREE_CODE (base) != COMPONENT_REF
+ || TREE_CODE (TREE_TYPE (base)) != ARRAY_TYPE)
+   return NULL_TREE;
 }
+  else if (TREE_CODE (base) == INDIRECT_REF
+  && TREE_CODE (TREE_OPERAND (base, 0)) == COMPONENT_REF
+  && (TREE_CODE (TREE_TYPE (TREE_OPERAND (base, 0)))
+  == REFERENCE_TYPE))
+base = TREE_OPERAND (base, 0);
 
   base = get_inner_reference (base, , , , ,
  , , );
-- 
2.29.2



[PATCH 1/4] [og11] Rewrite GOMP_MAP_ATTACH_DETACH mappings unconditionally

2021-06-02 Thread Julian Brown
It never makes sense for a GOMP_MAP_ATTACH_DETACH mapping to survive
beyond gimplify.c, so this patch rewrites such mappings to GOMP_MAP_ATTACH
or GOMP_MAP_DETACH unconditionally (rather than checking for a list
of types of OpenACC or OpenMP constructs), in cases where it hasn't
otherwise been done already in the preceding code.

2021-06-02  Julian Brown  

gcc/
* gimplify.c (gimplify_scan_omp_clauses): Simplify condition
for changing GOMP_MAP_ATTACH_DETACH to GOMP_MAP_ATTACH or
GOMP_MAP_DETACH.
---
 gcc/gimplify.c | 10 +-
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index 5b481b99953..a38cd502aa5 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -9871,15 +9871,7 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq 
*pre_p,
skip_map_struct:
  ;
}
- else if ((code == OACC_ENTER_DATA
-   || code == OACC_EXIT_DATA
-   || code == OACC_DATA
-   || code == OACC_PARALLEL
-   || code == OACC_KERNELS
-   || code == OACC_SERIAL
-   || code == OMP_TARGET_ENTER_DATA
-   || code == OMP_TARGET_EXIT_DATA)
-  && OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_ATTACH_DETACH)
+ else if (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_ATTACH_DETACH)
{
  gomp_map_kind k = ((code == OACC_EXIT_DATA
  || code == OMP_TARGET_EXIT_DATA)
-- 
2.29.2



[PATCH 0/4] [og11] OpenACC: Rework struct component handling

2021-06-02 Thread Julian Brown
This is a merge to the og11 branch of the patch series posted for
mainline here:

  https://gcc.gnu.org/pipermail/gcc-patches/2021-May/570396.html

and for the og10 branch here:

  https://gcc.gnu.org/pipermail/gcc-patches/2021-May/570810.html

Re-tested with offloading to NVPTX. I will push to the og11 branch
shortly.

Thanks,

Julian

Julian Brown (4):
  [og11] Rewrite GOMP_MAP_ATTACH_DETACH mappings unconditionally
  [og11] Unify ARRAY_REF/INDIRECT_REF stripping code in
extract_base_bit_offset
  [og11] Refactor struct lowering for OpenACC/OpenMP in gimplify.c
  [og11] Rework indirect struct handling for OpenACC in gimplify.c

 gcc/fortran/trans-openmp.c|   20 +-
 gcc/gimplify.c| 1151 +++--
 gcc/omp-low.c |   16 +-
 gcc/testsuite/g++.dg/goacc/member-array-acc.C |   13 +
 gcc/testsuite/g++.dg/gomp/member-array-omp.C  |   13 +
 .../testsuite/libgomp.oacc-c++/deep-copy-17.C |  101 ++
 .../libgomp.oacc-c-c++-common/deep-copy-15.c  |   68 +
 .../libgomp.oacc-c-c++-common/deep-copy-16.c  |  231 
 8 files changed, 1202 insertions(+), 411 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/goacc/member-array-acc.C
 create mode 100644 gcc/testsuite/g++.dg/gomp/member-array-omp.C
 create mode 100644 libgomp/testsuite/libgomp.oacc-c++/deep-copy-17.C
 create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-15.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-16.c

-- 
2.29.2



[RFC/PATCH] updating global ranges and their effect on __builtin_unreachable code

2021-06-02 Thread Aldy Hernandez via Gcc-patches
We've been having "issues" in our branch when exporting to the global 
space ranges that take into account previously known ranges 
(SSA_NAME_RANGE_INFO, etc).  For the longest time we had the export 
feature turned off because it had the potential of removing 
__builtin_unreachable code early in the pipeline.  This was causing one 
or two tests to fail.


I finally got fed up, and investigated why.

Take the following code:

  i_4 = somerandom ();
  if (i_4 < 0)
goto ; [INV]
  else
goto ; [INV]

   :
  __builtin_unreachable ();

   :

It turns out that both legacy evrp and VRP have code that notices the 
above pattern and sets the *global* range for i_4 to [0,MAX].  That is, 
the range for i_4 is set, not at BB4, but at the definition site.  See 
uses of assert_unreachable_fallthru_edge_p() for details.


This global range causes subsequent passes (VRP1 in the testcase below), 
to remove the checks and the __builtin_unreachable code altogether.


// pr80776-1.c
int somerandom (void);
void
Foo (void)
{
  int i = somerandom ();
  if (! (0 <= i))
__builtin_unreachable ();
  if (! (0 <= i && i <= 99))
__builtin_unreachable ();
  sprintf (number, "%d", i);
}

This means that by the time the -Wformat-overflow warning runs, the 
above sprintf has been left unguarded, and a bogus warning is issued.


Currently the above test does not warn, but that's because of an 
oversight in export_global_ranges().  This function is disregarding 
known global ranges (SSA_NAME_RANGE_INFO and SSA_NAME_PTR_INFO) and only 
setting ranges the ranger knows about.


For the above test the IL is:

   :
  i_4 = somerandom ();
  if (i_4 < 0)
goto ; [INV]
  else
goto ; [INV]

   :
  __builtin_unreachable ();

   :
  i.0_1 = (unsigned int) i_4;
  if (i.0_1 > 99)
goto ; [INV]
  else
goto ; [INV]

   :
  __builtin_unreachable ();

   :
  _7 = __builtin___sprintf_chk (, 1, 7, "%d", i_4);


Legacy evrp has determined that the range for i_4 is [0,MAX] per my 
analysis above, but ranger has no known range for i_4 at the definition 
site.  So at export_global_ranges time, ranger leaves the [0,MAX] alone.


OTOH, evrp sets the global range at the definition for i.0_1 to 
[0,99] per the same unreachable feature.  However, ranger has 
correctly determined that the range for i.0_1 at the definition is 
[0,MAX], which it then proceeds to export.  Since the current 
export_global_ranges (mistakenly) does not take into account previous 
global ranges, the ranges in the global tables end up like this:


i_4: [0, MAX]
i.0_1: [0, MAX]

This causes the first unreachable block to be removed in VRP1, but the 
second one to remain.  Later VRP can determine that i_4 in the sprintf 
call is [0,99], and no warning is issued.


But... the missing bogus warning is due to current export_global_ranges 
ignoring SSA_NAME_RANGE_INFO and friends, something which I'd like to 
fix.  However, fixing this, gets us back to:


i_4: [0, MAX]
i.0_1: [0, 99]

Which means, we'll be back to removing the unreachable blocks and 
issuing a warning in pr80776-1.c (like we have been since the beginning 
of time).


The attached patch fixes export_global_ranges to the expected behavior, 
and adds the previous XFAIL to pr80776-1.c, while documenting why this 
warning is issued in the first place.


Once legacy evrp is removed, this won't be an issue, as ranges in the IL 
will tell the truth.  However, this will mean that we will no longer 
remove the first __builtin_unreachable combo.  But ISTM, that would be 
correct behavior ??.


BTW, in addition to this patch we could explore removing the 
assert_unreachable_fallthru_edge_p() use in the evrp_analyzer, since it 
is no longer needed to get the warnings in the testcases in the original 
PR correctly (gcc.dg/pr80776-[12].c).


Tested on x86-64 Linux.

OK?

Aldy
>From 36684dde843a4c9556b97bf030cabef8b9430aa4 Mon Sep 17 00:00:00 2001
From: Aldy Hernandez 
Date: Tue, 1 Jun 2021 17:48:30 +0200
Subject: [PATCH 2/2] Use known global ranges in export_global_ranges

This patch modifies export_global_ranges to take into account current
global ranges.  It also handles enhances said function to export pointer
global ranges as well.

gcc/ChangeLog:

	* gimple-range.cc (gimple_ranger::export_global_ranges): Call
	  update_global_range.
	* value-query.cc (update_global_range): New.
	* value-query.h (update_global_range): New.

gcc/testsuite/ChangeLog:

	* gcc.dg/pr80776-1.c: XFAIL and document the reason why.
---
 gcc/gimple-range.cc  | 26 -
 gcc/testsuite/gcc.dg/pr80776-1.c | 12 +-
 gcc/value-query.cc   | 39 
 gcc/value-query.h|  1 +
 4 files changed, 61 insertions(+), 17 deletions(-)

diff --git a/gcc/gimple-range.cc b/gcc/gimple-range.cc
index ed0a0c9702b..af426207092 100644
--- a/gcc/gimple-range.cc
+++ b/gcc/gimple-range.cc
@@ -1115,7 +1115,7 @@ gimple_ranger::range_of_stmt (irange , gimple *s, 

[PATCH] AArch64: Improve address rematerialization costs

2021-06-02 Thread Wilco Dijkstra via Gcc-patches
Hi,

Given the large improvements from better register allocation of GOT accesses,
I decided to generalize it to get large gains for normal addressing too:

Improve rematerialization costs of addresses.  The current costs are set too 
high
which results in extra register pressure and spilling.  Using lower costs means
addresses will be rematerialized more often rather than being spilled or causing
spills.  This results in significant codesize reductions and performance gains.
SPECINT2017 improves by 0.27% with LTO and 0.16% without LTO.  Codesize is 0.12%
smaller.

Passes bootstrap and regress. OK for commit?

ChangeLog:
2021-06-01  Wilco Dijkstra  

* config/aarch64/aarch64.c (aarch64_rtx_costs): Use better 
rematerialization
costs for HIGH, LO_SUM and SYMBOL_REF.

---

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 
641c83b479e76cbcc75b299eb7ae5f634d9db7cd..08245827daa3f8199b29031e754244c078f0f500
 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -13444,45 +13444,22 @@ cost_plus:
  return false;  /* All arguments need to be in registers.  */
}
 
-case SYMBOL_REF:
+/* The following costs are used for rematerialization of addresses.
+   Set a low cost for all global accesses - this ensures they are
+   preferred for rematerialization, blocks them from being spilled
+   and reduces register pressure.  The result is significant codesize
+   reductions and performance gains. */
 
-  if (aarch64_cmodel == AARCH64_CMODEL_LARGE
- || aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC)
-   {
- /* LDR.  */
- if (speed)
-   *cost += extra_cost->ldst.load;
-   }
-  else if (aarch64_cmodel == AARCH64_CMODEL_SMALL
-  || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC)
-   {
- /* ADRP, followed by ADD.  */
- *cost += COSTS_N_INSNS (1);
- if (speed)
-   *cost += 2 * extra_cost->alu.arith;
-   }
-  else if (aarch64_cmodel == AARCH64_CMODEL_TINY
-  || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
-   {
- /* ADR.  */
- if (speed)
-   *cost += extra_cost->alu.arith;
-   }
-
-  if (flag_pic)
-   {
- /* One extra load instruction, after accessing the GOT.  */
- *cost += COSTS_N_INSNS (1);
- if (speed)
-   *cost += extra_cost->ldst.load;
-   }
+case SYMBOL_REF:
+  *cost = 0;
   return true;
 
 case HIGH:
+  *cost = 0;
+  return true;
+
 case LO_SUM:
-  /* ADRP/ADD (immediate).  */
-  if (speed)
-   *cost += extra_cost->alu.arith;
+  *cost = COSTS_N_INSNS (3) / 4;
   return true;
 
 case ZERO_EXTRACT:



Re: [RFC/PATCH 00/11] Fix up some unexpected empty split conditions

2021-06-02 Thread Richard Biener via Gcc-patches
On Wed, Jun 2, 2021 at 12:01 PM Kewen.Lin  wrote:
>
> on 2021/6/2 下午5:13, Richard Sandiford wrote:
> > "Kewen.Lin"  writes:
> >> Hi Richard,
> >>
> >> on 2021/6/2 锟斤拷锟斤拷4:11, Richard Sandiford wrote:
> >>> Kewen Lin  writes:
>  Hi all,
> 
>  define_insn_and_split should avoid to use empty split condition
>  if the condition for define_insn isn't empty, otherwise it can
>  sometimes result in unexpected consequence, since the split
>  will always be done even if the insn condition doesn't hold.
> 
>  To avoid forgetting to add "&& 1" onto split condition, as
>  Segher suggested in thread[1], this series is to add the check
>  and raise an error if it catches the unexpected cases.  With
>  this new check, we have to fix up some existing
>  define_insn_and_split which are detected as error.  I hope all
>  these places are not intentional to be kept as blank.
> >>>
> >>> I wonder whether we should instead redefine the semantics of
> >>> define_insn_and_split so that the split condition is always applied
> >>> on top of the insn condition.  It's rare for a define_insn_and_split
> >>> to have independent insn and split conditions, so at the moment,
> >>> we're making the common case hard.
> >>>
> >>
> >> Just want to confirm that the suggestion is just applied for empty
> >> split condition or all split conditions in define_insn_and_split?
> >> I guess it's the former?
> >
> > No, I meant tha latter.  E.g. in:
> >
> > (define_insn_and_split
> >   […]
> >   "TARGET_FOO"
> >   "…"
> >   […]
> >   "reload_completed"
> >   […]
> > )
> >
> > the "reload_completed" condition is almost always a typo for
> > "&& reload_completed".
> >
> > Like I say, it rarely makes sense for the split condition to
> > ignore the insn condition and specify an entirely independent condition.
> > There might be some define_insn_and_splits that do that, but it'd often
> > be less confusing to write the insn and split separately if so.
> >
> > Even if we do want to support independent insn and split conditions,
> > that's always going to be the rare and surprising case, so it's the one
> > that should need extra syntax.
> >
>
> Thanks for the clarification!
>
> Since it may impact all ports, I wonder if there is a way to find out
> this kind of "rare and surprising" case without a big coverage testing?
> I'm happy to make a draft patch for it, but not sure how to early catch
> those cases which need to be rewritten for those ports that I can't test
> on (even with cfarm machines, the coverage seems still limited).

So what Richard suggests would be to disallow split conditions
that do not start with "&& ", it's probably easy to do that as well
and look for build fails.  That should catch all cases to look at.

Richard.

> BR,
> Kewen
>


Re: [RFC/PATCH 00/11] Fix up some unexpected empty split conditions

2021-06-02 Thread Kewen.Lin via Gcc-patches
on 2021/6/2 下午5:13, Richard Sandiford wrote:
> "Kewen.Lin"  writes:
>> Hi Richard,
>>
>> on 2021/6/2 锟斤拷锟斤拷4:11, Richard Sandiford wrote:
>>> Kewen Lin  writes:
 Hi all,

 define_insn_and_split should avoid to use empty split condition
 if the condition for define_insn isn't empty, otherwise it can
 sometimes result in unexpected consequence, since the split
 will always be done even if the insn condition doesn't hold.

 To avoid forgetting to add "&& 1" onto split condition, as
 Segher suggested in thread[1], this series is to add the check
 and raise an error if it catches the unexpected cases.  With
 this new check, we have to fix up some existing
 define_insn_and_split which are detected as error.  I hope all
 these places are not intentional to be kept as blank.
>>>
>>> I wonder whether we should instead redefine the semantics of
>>> define_insn_and_split so that the split condition is always applied
>>> on top of the insn condition.  It's rare for a define_insn_and_split
>>> to have independent insn and split conditions, so at the moment,
>>> we're making the common case hard.
>>>
>>
>> Just want to confirm that the suggestion is just applied for empty
>> split condition or all split conditions in define_insn_and_split? 
>> I guess it's the former?
> 
> No, I meant tha latter.  E.g. in:
> 
> (define_insn_and_split
>   […]
>   "TARGET_FOO"
>   "…"
>   […]
>   "reload_completed"
>   […]
> )
> 
> the "reload_completed" condition is almost always a typo for
> "&& reload_completed".
> 
> Like I say, it rarely makes sense for the split condition to
> ignore the insn condition and specify an entirely independent condition.
> There might be some define_insn_and_splits that do that, but it'd often
> be less confusing to write the insn and split separately if so.
> 
> Even if we do want to support independent insn and split conditions,
> that's always going to be the rare and surprising case, so it's the one
> that should need extra syntax.
> 

Thanks for the clarification!

Since it may impact all ports, I wonder if there is a way to find out
this kind of "rare and surprising" case without a big coverage testing?
I'm happy to make a draft patch for it, but not sure how to early catch
those cases which need to be rewritten for those ports that I can't test
on (even with cfarm machines, the coverage seems still limited).

BR,
Kewen



RE: [PATCH][AArch32]: Correct sdot RTL on aarch32

2021-06-02 Thread Kyrylo Tkachov via Gcc-patches


> -Original Message-
> From: Tamar Christina 
> Sent: 02 June 2021 10:34
> To: Tamar Christina 
> Cc: Richard Earnshaw ; nd ;
> Ramana Radhakrishnan ; Kyrylo
> Tkachov 
> Subject: RE: [PATCH][AArch32]: Correct sdot RTL on aarch32
> 
> ping
> 
> > -Original Message-
> > From: Gcc-patches  On Behalf Of
> Tamar
> > Christina via Gcc-patches
> > Sent: Tuesday, May 25, 2021 3:58 PM
> > To: gcc-patches@gcc.gnu.org
> > Cc: Richard Earnshaw ; nd ;
> > Ramana Radhakrishnan 
> > Subject: [PATCH][AArch32]: Correct sdot RTL on aarch32
> >
> > Hi All,
> >
> > The RTL Generated from dot_prod is invalid as operand3
> > cannot be written to, it's a normal input.  For the expand it's just another
> > operand but the caller does not expect it to be written to.
> >
> > Bootstrapped Regtested on arm-none-linux-gnueabihf and no issues.
> >
> > Ok for master? and backport to GCC 11, 10, 9?

Ok.
Thanks,
Kyrill

> >
> > Thanks,
> > Tamar
> >
> > gcc/ChangeLog:
> >
> > * config/arm/neon.md (dot_prod): Drop statements.
> >
> > --- inline copy of patch --
> > diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index
> >
> 61d81646475ce3bf62ece2cec2faf0c1fe978ec1..9602e9993aeebf4ec620d10
> 5fd
> > 20f64498a3b851 100644
> > --- a/gcc/config/arm/neon.md
> > +++ b/gcc/config/arm/neon.md
> > @@ -3067,13 +3067,7 @@ (define_expand "dot_prod"
> >  DOTPROD)
> > (match_operand:VCVTI 3 "register_operand")))]
> >"TARGET_DOTPROD"
> > -{
> > -  emit_insn (
> > -gen_neon_dot (operands[3], operands[3], operands[1],
> > -operands[2]));
> > -  emit_insn (gen_rtx_SET (operands[0], operands[3]));
> > -  DONE;
> > -})
> > +)
> >
> >  ;; Auto-vectorizer pattern for usdot
> >  (define_expand "usdot_prod"
> >
> >
> > --



Re: [PATCH] Replace conditional_replacement with match and simplify

2021-06-02 Thread Andrew Pinski via Gcc-patches
On Wed, Jun 2, 2021 at 2:12 AM Andrew Pinski  wrote:
>
> On Wed, Jun 2, 2021 at 1:37 AM Christophe Lyon via Gcc-patches
>  wrote:
> >
> > On Tue, 1 Jun 2021 at 08:06, apinski--- via Gcc-patches
> >  wrote:
> > >
> > > From: Andrew Pinski 
> > >
> > > This is the first of series of patches to simplify phi-opt
> > > to use match and simplify in many cases.  This simplification
> > > will more things to optimize.
> > >
> > > This is what Richard requested in
> > > https://gcc.gnu.org/pipermail/gcc-patches/2021-May/571197.html
> > > and I think it is the right thing to do too.
> > >
> > > OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.
> > >
> > > gcc/ChangeLog:
> > >
> > > * tree-ssa-phiopt.c (match_simplify_replacement):
> > > New function.
> > > (tree_ssa_phiopt_worker): Use match_simplify_replacement.
> > > (two_value_replacement): Change the comment about
> > > conditional_replacement.
> > > (conditional_replacement): Delete.
> >
> > Hi Andrew,
> >
> > This patch caused a regression on aarch64:
> > FAIL: gcc.target/aarch64/subs_compare_2.c scan-assembler-not
> > cmp\\tw[0-9]+, w[0-9]+
> > FAIL: gcc.target/aarch64/subs_compare_2.c scan-assembler-times
> > subs\\tw[0-9]+, w[0-9]+, [#]?4 1
> >
> > Can you check?
>
> Yes because it simplified the code to being on the gimple level to:
>   _5 = MIN_EXPR ;
>   _6 = _5 + -4;
>
> Which is the same as:
> int f(int a)
> {
>   if (a >= 4) a = 4;
>   return a - 4;
> }
> Which is correct also.

I filed this as PR 100874 with two extra testcases which show the
problem before hand even.

Thanks,
Andrew

>
> So the back-end needs to be improved slightly.
> It could match:
> (set (reg/i:SI 0 x0)
> (plus:SI (smin:SI (reg/v:SI 94 [ a ])
> (const_int 4 [0x4]))
> (const_int -4 [0xfffc])))
>
> Which then splits to:
> (insn:TI 51 41 18 (parallel [
> (set (reg:CC 66 cc)
> (compare:CC (reg/v:SI 0 x0 [orig:93 a ] [93])
> (const_int 4 [0x4])))
> (set (reg:SI 0 x0 [95])
> (plus:SI (reg/v:SI 0 x0 [orig:93 a ] [93])
> (const_int -4 [0xfffc])))
> ]) "t.c":9:7 283 {subsi3_compare1_imm}
>  (nil))
> (insn:TI 18 51 19 (set (reg/i:SI 0 x0)
> (if_then_else:SI (lt (reg:CC 66 cc)
> (const_int 0 [0]))
> (reg:SI 0 x0 [95])
> (const_int 0 [0]))) "t.c":12:1 455 {*cmovsi_insn}
>  (expr_list:REG_DEAD (reg:CC 66 cc)
> (nil)))
>
> We could also change the testcase return a different value such as doing:
> int
> foo (int a, int b)
> {
>   int x = a - 4;
>   if (a < 4)
> return x;
>   else
> return b;
> }
> Such that foo does not do a MIN :).
>
> Thanks,
> Andrew Pinski
>
> >
> > Thanks
> >
> > Christophe
> >
> > > ---
> > >  gcc/tree-ssa-phiopt.c | 144 --
> > >  1 file changed, 39 insertions(+), 105 deletions(-)
> > >
> > > diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
> > > index e3bd18023a0..969b868397e 100644
> > > --- a/gcc/tree-ssa-phiopt.c
> > > +++ b/gcc/tree-ssa-phiopt.c
> > > @@ -53,8 +53,8 @@ along with GCC; see the file COPYING3.  If not see
> > >  static unsigned int tree_ssa_phiopt_worker (bool, bool, bool);
> > >  static bool two_value_replacement (basic_block, basic_block, edge, gphi 
> > > *,
> > >tree, tree);
> > > -static bool conditional_replacement (basic_block, basic_block,
> > > -edge, edge, gphi *, tree, tree);
> > > +static bool match_simplify_replacement (basic_block, basic_block,
> > > +   edge, edge, gphi *, tree, tree);
> > >  static gphi *factor_out_conditional_conversion (edge, edge, gphi *, 
> > > tree, tree,
> > > gimple *);
> > >  static int value_replacement (basic_block, basic_block,
> > > @@ -347,8 +347,8 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool 
> > > do_hoist_loads, bool early_p)
> > >   if (!early_p && two_value_replacement (bb, bb1, e2, phi, arg0, 
> > > arg1))
> > > cfgchanged = true;
> > >   else if (!early_p
> > > -  && conditional_replacement (bb, bb1, e1, e2, phi,
> > > -  arg0, arg1))
> > > +  && match_simplify_replacement (bb, bb1, e1, e2, phi,
> > > + arg0, arg1))
> > > cfgchanged = true;
> > >   else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
> > > cfgchanged = true;
> > > @@ -675,7 +675,7 @@ two_value_replacement (basic_block cond_bb, 
> > > basic_block middle_bb,
> > >  }
> > >
> > >/* Defer boolean x ? 0 : {1,-1} or x ? {1,-1} : 0 to
> > > - conditional_replacement.  */
> > > + match_simplify_replacement.  */
> > >if (TREE_CODE 

[PATCH] predcom: Enabled by loop vect at O2 [PR100794]

2021-06-02 Thread Kewen.Lin via Gcc-patches
Hi,

As PR100794 shows, in the current implementation PRE bypasses
some optimization to avoid introducing loop carried dependence
which stops loop vectorizer to vectorize the loop.  At -O2,
there is no downstream pass to re-catch this kind of opportunity
if loop vectorizer fails to vectorize that loop.

This patch follows Richi's suggestion in the PR, if predcom flag
isn't set and loop vectorization will enable predcom without any
unrolling implicitly.  The Power9 SPEC2017 evaluation showed it
can speed up 521.wrf_r 3.30% and 554.roms_r 1.08% at very-cheap
cost model, no remarkable impact at cheap cost model, the build
time and size impact is fine (see the PR for the details).

By the way, I tested another proposal to guard PRE not skip the
optimization for cheap and very-cheap vect cost models, the
evaluation results showed it's fine with very cheap cost model,
but it can degrade some bmks like 521.wrf_r -9.17% and
549.fotonik3d_r -2.07% etc.

Bootstrapped/regtested on powerpc64le-linux-gnu P9,
x86_64-redhat-linux and aarch64-linux-gnu.

Is it ok for trunk?

BR,
Kewen
-
gcc/ChangeLog:

PR tree-optimization/100794
* tree-predcom.c (tree_predictive_commoning_loop): Add parameter
allow_unroll_p and only allow unrolling when it's true.
(tree_predictive_commoning): Add parameter allow_unroll_p and
adjust for it.
(run_tree_predictive_commoning): Likewise.
(class pass_predcom): Add private member allow_unroll_p.
(pass_predcom::pass_predcom): Init allow_unroll_p.
(pass_predcom::gate): Check flag_tree_loop_vectorize and 
global_options_set.x_flag_predictive_commoning.
(pass_predcom::execute): Adjust for allow_unroll_p.

gcc/testsuite/ChangeLog:

PR tree-optimization/100794
* gcc.dg/tree-ssa/pr100794.c: New test.
 gcc/testsuite/gcc.dg/tree-ssa/pr100794.c | 20 +
 gcc/tree-predcom.c   | 57 +---
 2 files changed, 60 insertions(+), 17 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr100794.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr100794.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr100794.c
new file mode 100644
index 000..6f707ae7fba
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr100794.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-loop-vectorize -fdump-tree-pcom-details 
-fdisable-tree-vect" } */
+
+extern double arr[100];
+extern double foo (double, double);
+extern double sum;
+
+void
+test (int i_0, int i_n)
+{
+  int i;
+  for (i = i_0; i < i_n - 1; i++)
+{
+  double a = arr[i];
+  double b = arr[i + 1];
+  sum += a * b;
+}
+}
+
+/* { dg-final { scan-tree-dump "Executing predictive commoning without 
unrolling" "pcom" } } */
diff --git a/gcc/tree-predcom.c b/gcc/tree-predcom.c
index 02f911a08bb..65a93c8e505 100644
--- a/gcc/tree-predcom.c
+++ b/gcc/tree-predcom.c
@@ -3178,13 +3178,13 @@ insert_init_seqs (class loop *loop, vec chains)
applied to this loop.  */
 
 static unsigned
-tree_predictive_commoning_loop (class loop *loop)
+tree_predictive_commoning_loop (class loop *loop, bool allow_unroll_p)
 {
   vec datarefs;
   vec dependences;
   struct component *components;
   vec chains = vNULL;
-  unsigned unroll_factor;
+  unsigned unroll_factor = 0;
   class tree_niter_desc desc;
   bool unroll = false, loop_closed_ssa = false;
 
@@ -3272,11 +3272,13 @@ tree_predictive_commoning_loop (class loop *loop)
   dump_chains (dump_file, chains);
 }
 
-  /* Determine the unroll factor, and if the loop should be unrolled, ensure
- that its number of iterations is divisible by the factor.  */
-  unroll_factor = determine_unroll_factor (chains);
-  unroll = (unroll_factor > 1
-   && can_unroll_loop_p (loop, unroll_factor, ));
+  if (allow_unroll_p)
+/* Determine the unroll factor, and if the loop should be unrolled, ensure
+   that its number of iterations is divisible by the factor.  */
+unroll_factor = determine_unroll_factor (chains);
+
+  if (unroll_factor > 1)
+unroll = can_unroll_loop_p (loop, unroll_factor, );
 
   /* Execute the predictive commoning transformations, and possibly unroll the
  loop.  */
@@ -3319,7 +3321,7 @@ tree_predictive_commoning_loop (class loop *loop)
 /* Runs predictive commoning.  */
 
 unsigned
-tree_predictive_commoning (void)
+tree_predictive_commoning (bool allow_unroll_p)
 {
   class loop *loop;
   unsigned ret = 0, changed = 0;
@@ -3328,7 +3330,7 @@ tree_predictive_commoning (void)
   FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
 if (optimize_loop_for_speed_p (loop))
   {
-   changed |= tree_predictive_commoning_loop (loop);
+   changed |= tree_predictive_commoning_loop (loop, allow_unroll_p);
   }
   free_original_copy_tables ();
 
@@ -3355,12 +3357,12 @@ tree_predictive_commoning (void)
 /* Predictive commoning Pass.  */
 
 static unsigned
-run_tree_predictive_commoning (struct function *fun)

[PATCH] predcom: Adjust some unnecessary update_ssa calls

2021-06-02 Thread Kewen.Lin via Gcc-patches
Hi,

As Richi suggested in PR100794, this patch is to remove
some unnecessary update_ssa calls with flag
TODO_update_ssa_only_virtuals, also do some refactoring.

Bootstrapped/regtested on powerpc64le-linux-gnu P9,
x86_64-redhat-linux and aarch64-linux-gnu, built well
on Power9 ppc64le with --with-build-config=bootstrap-O3,
and passed both P8 and P9 SPEC2017 full build with
{-O3, -Ofast} + {,-funroll-loops}.

Is it ok for trunk?

BR,
Kewen
-
gcc/ChangeLog:

* tree-predcom.c (execute_pred_commoning): Remove update_ssa call.
(tree_predictive_commoning_loop): Factor some cleanup stuffs into
lambda function cleanup, remove scev_reset call, and adjust return
value.
(tree_predictive_commoning): Adjust for different changed values,
only set flag TODO_update_ssa_only_virtuals if changed.
(pass_data pass_data_predcom): Remove TODO_update_ssa_only_virtuals
from todo_flags_finish.

 gcc/tree-predcom.c | 50 ++
 1 file changed, 28 insertions(+), 22 deletions(-)

diff --git a/gcc/tree-predcom.c b/gcc/tree-predcom.c
index 5482f50198a..02f911a08bb 100644
--- a/gcc/tree-predcom.c
+++ b/gcc/tree-predcom.c
@@ -2280,8 +2280,6 @@ execute_pred_commoning (class loop *loop, vec 
chains,
remove_stmt (a->stmt);
}
 }
-
-  update_ssa (TODO_update_ssa_only_virtuals);
 }
 
 /* For each reference in CHAINS, if its defining statement is
@@ -3174,9 +3172,10 @@ insert_init_seqs (class loop *loop, vec chains)
   }
 }
 
-/* Performs predictive commoning for LOOP.  Sets bit 1<<0 of return value
-   if LOOP was unrolled; Sets bit 1<<1 of return value if loop closed ssa
-   form was corrupted.  */
+/* Performs predictive commoning for LOOP.  Sets bit 1<<1 of return value
+   if LOOP was unrolled; Sets bit 1<<2 of return value if loop closed ssa
+   form was corrupted.  Non-zero return value indicates some changes were
+   applied to this loop.  */
 
 static unsigned
 tree_predictive_commoning_loop (class loop *loop)
@@ -3188,7 +3187,6 @@ tree_predictive_commoning_loop (class loop *loop)
   unsigned unroll_factor;
   class tree_niter_desc desc;
   bool unroll = false, loop_closed_ssa = false;
-  edge exit;
 
   if (dump_file && (dump_flags & TDF_DETAILS))
 fprintf (dump_file, "Processing loop %d\n",  loop->num);
@@ -3244,13 +3242,22 @@ tree_predictive_commoning_loop (class loop *loop)
   determine_roots (loop, components, );
   release_components (components);
 
+  auto cleanup = [&]() {
+release_chains (chains);
+free_data_refs (datarefs);
+BITMAP_FREE (looparound_phis);
+free_affine_expand_cache (_expansions);
+  };
+
   if (!chains.exists ())
 {
   if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file,
 "Predictive commoning failed: no suitable chains\n");
-  goto end;
+  cleanup ();
+  return 0;
 }
+
   prepare_initializers (loop, chains);
   loop_closed_ssa = prepare_finalizers (loop, chains);
 
@@ -3268,10 +3275,8 @@ tree_predictive_commoning_loop (class loop *loop)
   /* Determine the unroll factor, and if the loop should be unrolled, ensure
  that its number of iterations is divisible by the factor.  */
   unroll_factor = determine_unroll_factor (chains);
-  scev_reset ();
   unroll = (unroll_factor > 1
&& can_unroll_loop_p (loop, unroll_factor, ));
-  exit = single_dom_exit (loop);
 
   /* Execute the predictive commoning transformations, and possibly unroll the
  loop.  */
@@ -3285,8 +3290,6 @@ tree_predictive_commoning_loop (class loop *loop)
   dta.chains = chains;
   dta.tmp_vars = tmp_vars;
 
-  update_ssa (TODO_update_ssa_only_virtuals);
-
   /* Cfg manipulations performed in tree_transform_and_unroll_loop before
 execute_pred_commoning_cbck is called may cause phi nodes to be
 reallocated, which is a problem since CHAINS may point to these
@@ -3295,6 +3298,7 @@ tree_predictive_commoning_loop (class loop *loop)
 the phi nodes in execute_pred_commoning_cbck.  A bit hacky.  */
   replace_phis_by_defined_names (chains);
 
+  edge exit = single_dom_exit (loop);
   tree_transform_and_unroll_loop (loop, unroll_factor, exit, ,
  execute_pred_commoning_cbck, );
   eliminate_temp_copies (loop, tmp_vars);
@@ -3307,14 +3311,9 @@ tree_predictive_commoning_loop (class loop *loop)
   execute_pred_commoning (loop, chains, tmp_vars);
 }
 
-end: ;
-  release_chains (chains);
-  free_data_refs (datarefs);
-  BITMAP_FREE (looparound_phis);
+  cleanup ();
 
-  free_affine_expand_cache (_expansions);
-
-  return (unroll ? 1 : 0) | (loop_closed_ssa ? 2 : 0);
+  return (unroll ? 2 : 1) | (loop_closed_ssa ? 4 : 1);
 }
 
 /* Runs predictive commoning.  */
@@ -3335,12 +3334,19 @@ tree_predictive_commoning (void)
 
   if (changed > 0)
 {
-  scev_reset ();
+  ret = TODO_update_ssa_only_virtuals;
 
+  /* Some 

  1   2   >